feat: enhance content extraction and documentation

Added a docstring to the get_content function to improve code documentation and clarify its intent. Enhanced URL processing logic for article links to ensure more readable and consistent URLs within the application. Included a docstring for the main function to outline its purpose. Improves code maintainability and readability.
2024-08-27 16:39:20 +02:00 · 2024-08-27 16:39:20 +02:00 · 51e31fc8fe
commit 51e31fc8fe
parent 5f710d8a31
1 changed files with 15 additions and 2 deletions
--- a/src/ducksforducks/app.py
+++ b/src/ducksforducks/app.py
@ -94,6 +94,14 @@ def article_page(path):


 def get_content(soup: BeautifulSoup) -> BeautifulSoup:
+    """Extracts the article content from the soup.
+
+    Args:
+        soup (BeautifulSoup): The soup of the article page.
+
+    Returns:
+        BeautifulSoup: The article content.
+    """
    article_content = soup.find("div", {"class": "a-wrapper"}).find("article")

    for img in article_content.find_all("img"):
@ -107,9 +115,13 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup:
            ad.decompose()

    for link in article_content.find_all("a"):
-        if link.get("href") and link["href"].startswith("https://www.geeksforgeeks.org/"):
+        if link.get("href") and link["href"].startswith(
+            "https://www.geeksforgeeks.org/"
+        ):
            if not link["href"].startswith("https://www.geeksforgeeks.org/user/"):
-                link["href"] = f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
+                link["href"] = (
+                    f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
+                )

            else:
                classes = link.get("class", [])
@ -125,6 +137,7 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup:


 def main():
+    """Runs the app."""
    port = int(os.getenv("PORT", 8113))
    debug = bool(os.getenv("DEBUG", False))
    app.run(port=port, debug=debug)