modify headerid to also work on HTML files #59
3 changed files with 194 additions and 25 deletions
|
|
@ -5,12 +5,28 @@ This plugin adds an anchor to each heading so you can deep-link to headers.
|
||||||
It is intended for formats such as reStructuredText that do not natively
|
It is intended for formats such as reStructuredText that do not natively
|
||||||
generate these anchors.
|
generate these anchors.
|
||||||
|
|
||||||
The ``HEADERID_LINK_CHAR`` config can be set to use a different char from ``*``
|
|
||||||
for anchor text.
|
|
||||||
|
|
||||||
For Markdown, this plugin is less relevant since the Python-Markdown library
|
For Markdown, this plugin is less relevant since the Python-Markdown library
|
||||||
includes a Table of Contents extension that will generate link anchors.
|
includes a Table of Contents extension that will generate link anchors.
|
||||||
To enable the ``toc`` extension, add a line similar to the following example
|
To enable the ``toc`` extension, add a line similar to the following example
|
||||||
to your Pelican settings file::
|
to your Pelican settings file::
|
||||||
|
|
||||||
MD_EXTENSIONS = ["codehilite(css_class=highlight)", "extra", "toc"]
|
MD_EXTENSIONS = ["codehilite(css_class=highlight)", "extra", "toc"]
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
Set parameters in ``pelicanconf.py`` config file.
|
||||||
|
|
||||||
|
HEADERID_LINK_CHAR: str
|
||||||
|
The ``HEADERID_LINK_CHAR`` config can be set to use a different char from ``*``
|
||||||
|
for anchor text.
|
||||||
|
|
||||||
|
GENERATE_IDS: bool
|
||||||
|
If ``True``, the plugin will create IDs for headings
|
||||||
|
that have neither ``id`` nor ``name`` attribute.
|
||||||
|
Used by link-anchors to reference headings.
|
||||||
|
|
||||||
|
SUBTREE: str
|
||||||
|
Only parse a subtree of the entire page.
|
||||||
|
Provide an ``id``.
|
||||||
|
Sets the root of the parsing process from ``<html>``
|
||||||
|
to the first tag with a matching ``id`` attribute.
|
||||||
|
|
@ -1,31 +1,180 @@
|
||||||
from pelican import readers
|
|
||||||
from pelican.readers import PelicanHTMLTranslator
|
|
||||||
from pelican import signals
|
from pelican import signals
|
||||||
from docutils import nodes
|
from bs4 import BeautifulSoup as bs
|
||||||
|
import re
|
||||||
|
import copy
|
||||||
|
|
||||||
|
# parameter defaults
|
||||||
LINK_CHAR = '*'
|
LINK_CHAR = '*'
|
||||||
|
GENERATE_IDS = False
|
||||||
|
SUBTREE = None
|
||||||
|
|
||||||
|
|
||||||
def init_headerid(sender):
|
def init_headerid(sender):
|
||||||
|
"""Parse pelican settings to get parameters from `pelicanconf.py`.
|
||||||
|
Is registered as callback function to the `initialized` signal.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
sender: pelican.Pelican
|
||||||
|
Pelican object containing process meta information
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
None
|
||||||
|
"""
|
||||||
global LINK_CHAR
|
global LINK_CHAR
|
||||||
|
global GENERATE_IDS
|
||||||
|
global SUBTREE
|
||||||
|
|
||||||
char = sender.settings.get('HEADERID_LINK_CHAR')
|
char = sender.settings.get('HEADERID_LINK_CHAR')
|
||||||
if char:
|
if char:
|
||||||
LINK_CHAR = char
|
LINK_CHAR = bs(char, 'html.parser')
|
||||||
|
|
||||||
|
GENERATE_IDS = bool(sender.settings.get('GENERATE_IDS'))
|
||||||
|
SUBTREE = sender.settings.get('SUBTREE')
|
||||||
|
|
||||||
|
|
||||||
|
def headerid_main(page_generator, content):
|
||||||
|
"""`headerid` plugin logic
|
||||||
|
|
||||||
|
Intercepts HTML generated by Pelican to add in a link anchor
|
||||||
|
for every heading-tag that has an identifier.
|
||||||
|
|
||||||
|
Is registered as CallBack function to the `page_generator_write_page`
|
||||||
|
signal.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
page_generator: pelican.generators.PagesGenerator
|
||||||
|
No usage in current implementation.
|
||||||
|
Still needed for CallBack.
|
||||||
|
|
||||||
|
content: pelican.contents.Page
|
||||||
|
Pelican Page object containing the HTML string to modify
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
content._content = _add_link_anchors(content._content)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_link_anchors(raw_html: str) -> str:
|
||||||
|
r"""
|
||||||
|
Adds the following link anchor (1) to every headline-tag (2)
|
||||||
|
that has an identifier.
|
||||||
|
- (1) <a class="headerlink" href="#{anchor_target}"
|
||||||
|
title="Permalink to this headline">
|
||||||
|
{LINK_CHAR}
|
||||||
|
</a>
|
||||||
|
- (2) Tags, that match `^h*\d` (Python RegEx)
|
||||||
|
|
||||||
|
HTML parsing and tree modifications are done via `BeautifulSoup`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
raw_html: str
|
||||||
|
Python string representing DOM tree
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
Input DOM tree with the added link anchors (1)
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Makes use of global `SUBTREE` and `LINK_CHAR` variables.
|
||||||
|
See `README` for further information.
|
||||||
|
"""
|
||||||
|
soup = bs(raw_html, 'html.parser')
|
||||||
|
headers = []
|
||||||
|
|
||||||
|
if SUBTREE:
|
||||||
|
subtree_tag = soup.find(id=SUBTREE)
|
||||||
|
if subtree_tag:
|
||||||
|
headers = subtree_tag.find_all(re.compile(r'^h*\d'))
|
||||||
|
else:
|
||||||
|
headers = soup.find_all(re.compile(r'^h*\d'))
|
||||||
|
|
||||||
|
# add in link anchor
|
||||||
|
for tag in headers:
|
||||||
|
if _has_anchor(str(tag)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
anchor_target = _get_target(tag)
|
||||||
|
if anchor_target is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
anchor = soup.new_tag("a", attrs={"href": f"#{anchor_target}",
|
||||||
|
"title": "Permalink to this headline",
|
||||||
|
"class": "headerlink"})
|
||||||
|
|
||||||
|
# parse LINK_CHAR for HTML-tags to insert those into anchor
|
||||||
|
anchor.append(copy.deepcopy(LINK_CHAR))
|
||||||
|
tag.append(anchor)
|
||||||
|
|
||||||
|
return str(soup)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_anchor(inp: str) -> bool:
|
||||||
|
"""
|
||||||
|
Take in an HTML string and check for an anchor-tag
|
||||||
|
with href-Attribute starting with `#`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
inp: str
|
||||||
|
`inp` is interpreted as HTML and parsed via `BeautifulSoup`
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
"""
|
||||||
|
soup = bs(inp, 'html.parser')
|
||||||
|
return any(tag['href'].startswith('#') for tag in soup.find_all('a'))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_target(tag) -> str:
|
||||||
|
"""Get identifier for given HTML tag
|
||||||
|
|
||||||
|
Take a tag and extract something an anchor-tag can reference
|
||||||
|
like ID or name.
|
||||||
|
|
||||||
|
If neither present && GENERATE_IDS flag,
|
||||||
|
create and set ID based on tag contents.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
tag: bs4.element.Tag
|
||||||
|
HTML tag to get target for
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
Referenceable ID or name
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Makes use of the global `GENERATE_IDS` flag.
|
||||||
|
See `README` for further information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if tag.attrs.get('id'):
|
||||||
|
return tag.attrs.get('id')
|
||||||
|
elif tag.attrs.get('name'):
|
||||||
|
return tag.attrs.get('name')
|
||||||
|
elif GENERATE_IDS:
|
||||||
|
tag['id'] = re.sub(r'\W+', '', tag.text)
|
||||||
|
anchor_target = re.sub(r'\W+', '', tag.text)
|
||||||
|
return anchor_target
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def register():
|
def register():
|
||||||
|
# Connect CallBack function to signal.
|
||||||
|
# Function gets executed when pelican issues the signal.
|
||||||
|
|
||||||
signals.initialized.connect(init_headerid)
|
signals.initialized.connect(init_headerid)
|
||||||
|
signals.page_generator_write_page.connect(headerid_main)
|
||||||
|
|
||||||
class HeaderIDPatchedPelicanHTMLTranslator(PelicanHTMLTranslator):
|
|
||||||
def depart_title(self, node):
|
|
||||||
close_tag = self.context[-1]
|
|
||||||
parent = node.parent
|
|
||||||
if isinstance(parent, nodes.section) and parent.hasattr('ids') and parent['ids']:
|
|
||||||
anchor_name = parent['ids'][0]
|
|
||||||
# add permalink anchor
|
|
||||||
if close_tag.startswith('</h'):
|
|
||||||
self.body.append(
|
|
||||||
'<a class="headerlink" href="#%s" title="Permalink to this headline">%s</a>' %
|
|
||||||
(anchor_name, LINK_CHAR))
|
|
||||||
PelicanHTMLTranslator.depart_title(self, node)
|
|
||||||
readers.PelicanHTMLTranslator = HeaderIDPatchedPelicanHTMLTranslator
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ LOCALE = 'en_US.UTF-8'
|
||||||
#
|
#
|
||||||
PATH = 'content'
|
PATH = 'content'
|
||||||
PLUGIN_PATHS = ['pelican-plugins']
|
PLUGIN_PATHS = ['pelican-plugins']
|
||||||
PLUGINS = ['tipue_search', 'sitemap']
|
PLUGINS = ['tipue_search', 'sitemap', 'headerid']
|
||||||
SITEMAP = { 'format': 'xml' }
|
SITEMAP = { 'format': 'xml' }
|
||||||
|
|
||||||
THEME = 'theme'
|
THEME = 'theme'
|
||||||
|
|
@ -41,8 +41,6 @@ EXTRA_PATH_METADATA = {
|
||||||
"static/site.webmanifest": {'path': ''},
|
"static/site.webmanifest": {'path': ''},
|
||||||
}
|
}
|
||||||
|
|
||||||
#HEADERID_LINK_CHAR = '<i class="icon-link"></i>'
|
|
||||||
|
|
||||||
FEED_ALL_ATOM = None
|
FEED_ALL_ATOM = None
|
||||||
AUTHOR_SAVE_AS = False
|
AUTHOR_SAVE_AS = False
|
||||||
|
|
||||||
|
|
@ -55,3 +53,9 @@ MENUITEMS = ( ('About', 'about.html'),
|
||||||
('Explore', 'explore.html'),
|
('Explore', 'explore.html'),
|
||||||
('Publications', 'publications.html'),
|
('Publications', 'publications.html'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# headerid parameters
|
||||||
|
#
|
||||||
|
HEADERID_LINK_CHAR = ' #'
|
||||||
|
GENERATE_IDS = True
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue