feat: enhance content extraction and documentation
Added a docstring to the get_content function to improve code documentation and clarify its intent. Enhanced URL processing logic for article links to ensure more readable and consistent URLs within the application. Included a docstring for the main function to outline its purpose. Improves code maintainability and readability.
This commit is contained in:
parent
5f710d8a31
commit
51e31fc8fe
1 changed files with 15 additions and 2 deletions
|
@ -94,6 +94,14 @@ def article_page(path):
|
||||||
|
|
||||||
|
|
||||||
def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
||||||
|
"""Extracts the article content from the soup.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
soup (BeautifulSoup): The soup of the article page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BeautifulSoup: The article content.
|
||||||
|
"""
|
||||||
article_content = soup.find("div", {"class": "a-wrapper"}).find("article")
|
article_content = soup.find("div", {"class": "a-wrapper"}).find("article")
|
||||||
|
|
||||||
for img in article_content.find_all("img"):
|
for img in article_content.find_all("img"):
|
||||||
|
@ -107,9 +115,13 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
||||||
ad.decompose()
|
ad.decompose()
|
||||||
|
|
||||||
for link in article_content.find_all("a"):
|
for link in article_content.find_all("a"):
|
||||||
if link.get("href") and link["href"].startswith("https://www.geeksforgeeks.org/"):
|
if link.get("href") and link["href"].startswith(
|
||||||
|
"https://www.geeksforgeeks.org/"
|
||||||
|
):
|
||||||
if not link["href"].startswith("https://www.geeksforgeeks.org/user/"):
|
if not link["href"].startswith("https://www.geeksforgeeks.org/user/"):
|
||||||
link["href"] = f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
|
link["href"] = (
|
||||||
|
f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
classes = link.get("class", [])
|
classes = link.get("class", [])
|
||||||
|
@ -125,6 +137,7 @@ def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
"""Runs the app."""
|
||||||
port = int(os.getenv("PORT", 8113))
|
port = int(os.getenv("PORT", 8113))
|
||||||
debug = bool(os.getenv("DEBUG", False))
|
debug = bool(os.getenv("DEBUG", False))
|
||||||
app.run(port=port, debug=debug)
|
app.run(port=port, debug=debug)
|
||||||
|
|
Loading…
Reference in a new issue