feat: add internal link parsing

This commit is contained in:
thomasabishop 2024-11-01 16:11:16 +00:00
parent b5954bf529
commit baeb348f6f
3 changed files with 34 additions and 7 deletions

View file

@ -36,3 +36,10 @@ Most modern programming languages are Turing Complete in that they can, in
theory, be used to compute anything that is computable.
What about Universal Turing Machines eh?
Within the [hierarchy of the OS](./Basic_model_of_the_operating_system.md), the
kernel acts as the primary mediator between the hardware (CPU, memory) and
[user](./User_Space.md) [processes](Processes.md). Let's look at each of its
responsibilities in greater depth:

View file

@ -9,10 +9,11 @@ from services.parse_markdown_service import ParseMarkdownService
class Entry(TypedDict):
title: str
tags: List[str]
body: str
last_modified: str
size: int
size: str
tags: List[str]
links: List[str]
body: str
class ParseFileService:
@ -33,5 +34,6 @@ class ParseFileService:
),
"size": size(self.info.st_size),
"tags": markdown_data.get("tags", []),
"links": markdown_data.get("links", []),
"body": markdown_data.get("body", []),
}

View file

@ -1,16 +1,34 @@
import os
import re
import frontmatter
class ParseMarkdownService:
"""Extract tags, links and body text from Markdown entries"""
"""Extract tags, internal links and body text from Markdown entries"""
def __init__(self, eolas_file):
self.eolas_file = eolas_file
def __init__(self, file):
self.file = file
def __get_internal_links(self):
link_rgx = r"\[.*?\]\(([^)]+\.md)\)"
with open(self.file, "r") as f:
internal_links = []
lines = f.readlines()
for line in lines:
internal_link = re.findall(link_rgx, line)
if internal_link:
# internal_links.append(internal_link)
internal_links.append(
[os.path.basename(link) for link in internal_link]
)
return [item for row in internal_links for item in row]
def parse(self):
with open(self.eolas_file) as f:
with open(self.file) as f:
metadata, content = frontmatter.parse(f.read())
return {
"tags": metadata.get("tags", []),
"body": content or "",
"links": self.__get_internal_links() or [],
}