feat: add internal link parsing

This commit is contained in:
thomasabishop 2024-11-01 16:11:16 +00:00
parent b5954bf529
commit baeb348f6f
3 changed files with 34 additions and 7 deletions

View file

@ -36,3 +36,10 @@ Most modern programming languages are Turing Complete in that they can, in
theory, be used to compute anything that is computable. theory, be used to compute anything that is computable.
What about Universal Turing Machines eh? What about Universal Turing Machines eh?
Within the [hierarchy of the OS](./Basic_model_of_the_operating_system.md), the
kernel acts as the primary mediator between the hardware (CPU, memory) and
[user](./User_Space.md) [processes](Processes.md). Let's look at each of its
responsibilities in greater depth:

View file

@ -9,10 +9,11 @@ from services.parse_markdown_service import ParseMarkdownService
class Entry(TypedDict): class Entry(TypedDict):
title: str title: str
tags: List[str]
body: str
last_modified: str last_modified: str
size: int size: str
tags: List[str]
links: List[str]
body: str
class ParseFileService: class ParseFileService:
@ -33,5 +34,6 @@ class ParseFileService:
), ),
"size": size(self.info.st_size), "size": size(self.info.st_size),
"tags": markdown_data.get("tags", []), "tags": markdown_data.get("tags", []),
"links": markdown_data.get("links", []),
"body": markdown_data.get("body", []), "body": markdown_data.get("body", []),
} }

View file

@ -1,16 +1,34 @@
import os
import re
import frontmatter import frontmatter
class ParseMarkdownService: class ParseMarkdownService:
"""Extract tags, links and body text from Markdown entries""" """Extract tags, internal links and body text from Markdown entries"""
def __init__(self, eolas_file): def __init__(self, file):
self.eolas_file = eolas_file self.file = file
def __get_internal_links(self):
link_rgx = r"\[.*?\]\(([^)]+\.md)\)"
with open(self.file, "r") as f:
internal_links = []
lines = f.readlines()
for line in lines:
internal_link = re.findall(link_rgx, line)
if internal_link:
# internal_links.append(internal_link)
internal_links.append(
[os.path.basename(link) for link in internal_link]
)
return [item for row in internal_links for item in row]
def parse(self): def parse(self):
with open(self.eolas_file) as f: with open(self.file) as f:
metadata, content = frontmatter.parse(f.read()) metadata, content = frontmatter.parse(f.read())
return { return {
"tags": metadata.get("tags", []), "tags": metadata.get("tags", []),
"body": content or "", "body": content or "",
"links": self.__get_internal_links() or [],
} }