From baeb348f6f2f2b487c1b52bb39dfe47d6207ef74 Mon Sep 17 00:00:00 2001 From: thomasabishop Date: Fri, 1 Nov 2024 16:11:16 +0000 Subject: [PATCH] feat: add internal link parsing --- dev-data/Turing_completeness.md | 7 +++++++ src/services/parse_file_service.py | 8 +++++--- src/services/parse_markdown_service.py | 26 ++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/dev-data/Turing_completeness.md b/dev-data/Turing_completeness.md index 5e3665a..f2de6f5 100644 --- a/dev-data/Turing_completeness.md +++ b/dev-data/Turing_completeness.md @@ -36,3 +36,10 @@ Most modern programming languages are Turing Complete in that they can, in theory, be used to compute anything that is computable. What about Universal Turing Machines eh? + + +Within the [hierarchy of the OS](./Basic_model_of_the_operating_system.md), the +kernel acts as the primary mediator between the hardware (CPU, memory) and +[user](./User_Space.md) [processes](Processes.md). Let's look at each of its +responsibilities in greater depth: + diff --git a/src/services/parse_file_service.py b/src/services/parse_file_service.py index bc66b18..ca4f491 100644 --- a/src/services/parse_file_service.py +++ b/src/services/parse_file_service.py @@ -9,10 +9,11 @@ from services.parse_markdown_service import ParseMarkdownService class Entry(TypedDict): title: str - tags: List[str] - body: str last_modified: str - size: int + size: str + tags: List[str] + links: List[str] + body: str class ParseFileService: @@ -33,5 +34,6 @@ class ParseFileService: ), "size": size(self.info.st_size), "tags": markdown_data.get("tags", []), + "links": markdown_data.get("links", []), "body": markdown_data.get("body", []), } diff --git a/src/services/parse_markdown_service.py b/src/services/parse_markdown_service.py index f2b6611..021f671 100644 --- a/src/services/parse_markdown_service.py +++ b/src/services/parse_markdown_service.py @@ -1,16 +1,34 @@ +import os +import re + import frontmatter class ParseMarkdownService: - """Extract tags, links and body text from Markdown entries""" + """Extract tags, internal links and body text from Markdown entries""" - def __init__(self, eolas_file): - self.eolas_file = eolas_file + def __init__(self, file): + self.file = file + + def __get_internal_links(self): + link_rgx = r"\[.*?\]\(([^)]+\.md)\)" + with open(self.file, "r") as f: + internal_links = [] + lines = f.readlines() + for line in lines: + internal_link = re.findall(link_rgx, line) + if internal_link: + # internal_links.append(internal_link) + internal_links.append( + [os.path.basename(link) for link in internal_link] + ) + return [item for row in internal_links for item in row] def parse(self): - with open(self.eolas_file) as f: + with open(self.file) as f: metadata, content = frontmatter.parse(f.read()) return { "tags": metadata.get("tags", []), "body": content or "", + "links": self.__get_internal_links() or [], }