From 51e31fc8fe32b8bf0ee3c662708e7008f5dd2046 Mon Sep 17 00:00:00 2001 From: Kumi Date: Tue, 27 Aug 2024 16:39:20 +0200 Subject: [PATCH] feat: enhance content extraction and documentation Added a docstring to the get_content function to improve code documentation and clarify its intent. Enhanced URL processing logic for article links to ensure more readable and consistent URLs within the application. Included a docstring for the main function to outline its purpose. Improves code maintainability and readability. --- src/ducksforducks/app.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/ducksforducks/app.py b/src/ducksforducks/app.py index f468405..938ccd9 100644 --- a/src/ducksforducks/app.py +++ b/src/ducksforducks/app.py @@ -94,6 +94,14 @@ def article_page(path): def get_content(soup: BeautifulSoup) -> BeautifulSoup: + """Extracts the article content from the soup. + + Args: + soup (BeautifulSoup): The soup of the article page. + + Returns: + BeautifulSoup: The article content. + """ article_content = soup.find("div", {"class": "a-wrapper"}).find("article") for img in article_content.find_all("img"): @@ -107,9 +115,13 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup: ad.decompose() for link in article_content.find_all("a"): - if link.get("href") and link["href"].startswith("https://www.geeksforgeeks.org/"): + if link.get("href") and link["href"].startswith( + "https://www.geeksforgeeks.org/" + ): if not link["href"].startswith("https://www.geeksforgeeks.org/user/"): - link["href"] = f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}" + link["href"] = ( + f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}" + ) else: classes = link.get("class", []) @@ -125,6 +137,7 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup: def main(): + """Runs the app.""" port = int(os.getenv("PORT", 8113)) debug = bool(os.getenv("DEBUG", False)) app.run(port=port, debug=debug)