modify headerid to also work on HTML files #59
3 changed files with 194 additions and 25 deletions
|
|
@ -5,12 +5,28 @@ This plugin adds an anchor to each heading so you can deep-link to headers.
|
|||
It is intended for formats such as reStructuredText that do not natively
|
||||
generate these anchors.
|
||||
|
||||
The ``HEADERID_LINK_CHAR`` config can be set to use a different char from ``*``
|
||||
for anchor text.
|
||||
|
||||
For Markdown, this plugin is less relevant since the Python-Markdown library
|
||||
includes a Table of Contents extension that will generate link anchors.
|
||||
To enable the ``toc`` extension, add a line similar to the following example
|
||||
to your Pelican settings file::
|
||||
|
||||
MD_EXTENSIONS = ["codehilite(css_class=highlight)", "extra", "toc"]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
Set parameters in ``pelicanconf.py`` config file.
|
||||
|
||||
HEADERID_LINK_CHAR: str
|
||||
The ``HEADERID_LINK_CHAR`` config can be set to use a different char from ``*``
|
||||
for anchor text.
|
||||
|
||||
GENERATE_IDS: bool
|
||||
If ``True``, the plugin will create IDs for headings
|
||||
that have neither ``id`` nor ``name`` attribute.
|
||||
Used by link-anchors to reference headings.
|
||||
|
||||
SUBTREE: str
|
||||
Only parse a subtree of the entire page.
|
||||
Provide an ``id``.
|
||||
Sets the root of the parsing process from ``<html>``
|
||||
to the first tag with a matching ``id`` attribute.
|
||||
|
|
@ -1,31 +1,180 @@
|
|||
from pelican import readers
|
||||
from pelican.readers import PelicanHTMLTranslator
|
||||
from pelican import signals
|
||||
from docutils import nodes
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import re
|
||||
import copy
|
||||
|
||||
# parameter defaults
|
||||
LINK_CHAR = '*'
|
||||
GENERATE_IDS = False
|
||||
SUBTREE = None
|
||||
|
||||
|
||||
def init_headerid(sender):
|
||||
"""Parse pelican settings to get parameters from `pelicanconf.py`.
|
||||
Is registered as callback function to the `initialized` signal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sender: pelican.Pelican
|
||||
Pelican object containing process meta information
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
None
|
||||
"""
|
||||
global LINK_CHAR
|
||||
global GENERATE_IDS
|
||||
global SUBTREE
|
||||
|
||||
char = sender.settings.get('HEADERID_LINK_CHAR')
|
||||
if char:
|
||||
LINK_CHAR = char
|
||||
LINK_CHAR = bs(char, 'html.parser')
|
||||
|
||||
GENERATE_IDS = bool(sender.settings.get('GENERATE_IDS'))
|
||||
SUBTREE = sender.settings.get('SUBTREE')
|
||||
|
||||
|
||||
def headerid_main(page_generator, content):
|
||||
"""`headerid` plugin logic
|
||||
|
||||
Intercepts HTML generated by Pelican to add in a link anchor
|
||||
for every heading-tag that has an identifier.
|
||||
|
||||
Is registered as CallBack function to the `page_generator_write_page`
|
||||
signal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
page_generator: pelican.generators.PagesGenerator
|
||||
No usage in current implementation.
|
||||
Still needed for CallBack.
|
||||
|
||||
content: pelican.contents.Page
|
||||
Pelican Page object containing the HTML string to modify
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
content._content = _add_link_anchors(content._content)
|
||||
|
||||
|
||||
def _add_link_anchors(raw_html: str) -> str:
|
||||
r"""
|
||||
Adds the following link anchor (1) to every headline-tag (2)
|
||||
that has an identifier.
|
||||
- (1) <a class="headerlink" href="#{anchor_target}"
|
||||
title="Permalink to this headline">
|
||||
{LINK_CHAR}
|
||||
</a>
|
||||
- (2) Tags, that match `^h*\d` (Python RegEx)
|
||||
|
||||
HTML parsing and tree modifications are done via `BeautifulSoup`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw_html: str
|
||||
Python string representing DOM tree
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Input DOM tree with the added link anchors (1)
|
||||
|
||||
Notes
|
||||
-----
|
||||
Makes use of global `SUBTREE` and `LINK_CHAR` variables.
|
||||
See `README` for further information.
|
||||
"""
|
||||
soup = bs(raw_html, 'html.parser')
|
||||
headers = []
|
||||
|
||||
if SUBTREE:
|
||||
subtree_tag = soup.find(id=SUBTREE)
|
||||
if subtree_tag:
|
||||
headers = subtree_tag.find_all(re.compile(r'^h*\d'))
|
||||
else:
|
||||
headers = soup.find_all(re.compile(r'^h*\d'))
|
||||
|
||||
# add in link anchor
|
||||
for tag in headers:
|
||||
if _has_anchor(str(tag)):
|
||||
continue
|
||||
|
||||
anchor_target = _get_target(tag)
|
||||
if anchor_target is None:
|
||||
continue
|
||||
|
||||
anchor = soup.new_tag("a", attrs={"href": f"#{anchor_target}",
|
||||
"title": "Permalink to this headline",
|
||||
"class": "headerlink"})
|
||||
|
||||
# parse LINK_CHAR for HTML-tags to insert those into anchor
|
||||
anchor.append(copy.deepcopy(LINK_CHAR))
|
||||
tag.append(anchor)
|
||||
|
||||
return str(soup)
|
||||
|
||||
|
||||
def _has_anchor(inp: str) -> bool:
|
||||
"""
|
||||
Take in an HTML string and check for an anchor-tag
|
||||
with href-Attribute starting with `#`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inp: str
|
||||
`inp` is interpreted as HTML and parsed via `BeautifulSoup`
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
soup = bs(inp, 'html.parser')
|
||||
return any(tag['href'].startswith('#') for tag in soup.find_all('a'))
|
||||
|
||||
|
||||
def _get_target(tag) -> str:
|
||||
"""Get identifier for given HTML tag
|
||||
|
||||
Take a tag and extract something an anchor-tag can reference
|
||||
like ID or name.
|
||||
|
||||
If neither present && GENERATE_IDS flag,
|
||||
create and set ID based on tag contents.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tag: bs4.element.Tag
|
||||
HTML tag to get target for
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Referenceable ID or name
|
||||
|
||||
Notes
|
||||
-----
|
||||
Makes use of the global `GENERATE_IDS` flag.
|
||||
See `README` for further information.
|
||||
"""
|
||||
|
||||
if tag.attrs.get('id'):
|
||||
return tag.attrs.get('id')
|
||||
elif tag.attrs.get('name'):
|
||||
return tag.attrs.get('name')
|
||||
elif GENERATE_IDS:
|
||||
tag['id'] = re.sub(r'\W+', '', tag.text)
|
||||
anchor_target = re.sub(r'\W+', '', tag.text)
|
||||
return anchor_target
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def register():
|
||||
# Connect CallBack function to signal.
|
||||
# Function gets executed when pelican issues the signal.
|
||||
|
||||
signals.initialized.connect(init_headerid)
|
||||
|
||||
|
||||
class HeaderIDPatchedPelicanHTMLTranslator(PelicanHTMLTranslator):
|
||||
def depart_title(self, node):
|
||||
close_tag = self.context[-1]
|
||||
parent = node.parent
|
||||
if isinstance(parent, nodes.section) and parent.hasattr('ids') and parent['ids']:
|
||||
anchor_name = parent['ids'][0]
|
||||
# add permalink anchor
|
||||
if close_tag.startswith('</h'):
|
||||
self.body.append(
|
||||
'<a class="headerlink" href="#%s" title="Permalink to this headline">%s</a>' %
|
||||
(anchor_name, LINK_CHAR))
|
||||
PelicanHTMLTranslator.depart_title(self, node)
|
||||
readers.PelicanHTMLTranslator = HeaderIDPatchedPelicanHTMLTranslator
|
||||
signals.page_generator_write_page.connect(headerid_main)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ LOCALE = 'en_US.UTF-8'
|
|||
#
|
||||
PATH = 'content'
|
||||
PLUGIN_PATHS = ['pelican-plugins']
|
||||
PLUGINS = ['tipue_search', 'sitemap']
|
||||
PLUGINS = ['tipue_search', 'sitemap', 'headerid']
|
||||
SITEMAP = { 'format': 'xml' }
|
||||
|
||||
THEME = 'theme'
|
||||
|
|
@ -41,8 +41,6 @@ EXTRA_PATH_METADATA = {
|
|||
"static/site.webmanifest": {'path': ''},
|
||||
}
|
||||
|
||||
#HEADERID_LINK_CHAR = '<i class="icon-link"></i>'
|
||||
|
||||
FEED_ALL_ATOM = None
|
||||
AUTHOR_SAVE_AS = False
|
||||
|
||||
|
|
@ -55,3 +53,9 @@ MENUITEMS = ( ('About', 'about.html'),
|
|||
('Explore', 'explore.html'),
|
||||
('Publications', 'publications.html'),
|
||||
)
|
||||
|
||||
#
|
||||
# headerid parameters
|
||||
#
|
||||
HEADERID_LINK_CHAR = ' #'
|
||||
GENERATE_IDS = True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue