feat: add proxy service for GeeksforGeeks articles
Set up initial project structure and dependencies for Ducks for Ducks proxy service. - Added .gitignore to exclude virtual environments, __pycache__, and build directories. - Included MIT License for the project. - Created README.md for project documentation. - Configured pyproject.toml to use Hatchling as build system, specified project metadata and dependencies. - Implemented Flask app for proxying GeeksforGeeks articles: - Routes to serve static files, index page, proxy images, and render articles. - Function to fetch and clean article content. - Added custom CSS for styling differences between internal and external links. - Included Bootstrap CSS for base styling. - Created HTML templates for base layout, index, and article pages. This change sets up the core functionality of proxying GeeksforGeeks content through the Flask application.
This commit is contained in:
commit
5f710d8a31
11 changed files with 277 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
venv/
|
||||
.venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
/dist/
|
19
LICENSE
Normal file
19
LICENSE
Normal file
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2024 Private.coffee Team <support@private.coffee>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
0
README.md
Normal file
0
README.md
Normal file
26
pyproject.toml
Normal file
26
pyproject.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "ducksforducks"
|
||||
version = "0.1.0"
|
||||
authors = [{ name = "Private.coffee Team", email = "support@private.coffee" }]
|
||||
description = "A simple frontend for GeeksforGeeks"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.10"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dependencies = ["flask", "bs4"]
|
||||
|
||||
[project.scripts]
|
||||
ducksforducks = "ducksforducks.app:main"
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://git.private.coffee/privatecoffee/ducksforducks"
|
||||
"Bug Tracker" = "https://git.private.coffee/privatecoffee/ducksforducks/issues"
|
||||
"Source Code" = "https://git.private.coffee/privatecoffee/ducksforducks"
|
0
src/ducksforducks/__init__.py
Normal file
0
src/ducksforducks/__init__.py
Normal file
134
src/ducksforducks/app.py
Normal file
134
src/ducksforducks/app.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
from flask import (
|
||||
Flask,
|
||||
request,
|
||||
send_from_directory,
|
||||
render_template,
|
||||
Response,
|
||||
)
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
import urllib
|
||||
from typing import Text
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setLevel(logging.DEBUG)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Remove the default Flask logger
|
||||
app.logger.removeHandler(app.logger.handlers[0])
|
||||
|
||||
|
||||
@app.route("/static/<path:path>")
|
||||
def static_files(path: str) -> Response:
|
||||
"""Serves static files.
|
||||
|
||||
Args:
|
||||
path (str): The path to the static file.
|
||||
|
||||
Returns:
|
||||
Response: The static file.
|
||||
"""
|
||||
return send_from_directory("static", path)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index() -> Text:
|
||||
"""Renders the index page.
|
||||
|
||||
Returns:
|
||||
Text: The rendered index page.
|
||||
"""
|
||||
host = request.host
|
||||
return render_template("index.html", host=host)
|
||||
|
||||
|
||||
@app.route("/proxy")
|
||||
def proxy() -> bytes:
|
||||
"""A simple proxy for images.
|
||||
|
||||
Returns:
|
||||
bytes: The content of the proxied URL.
|
||||
"""
|
||||
url = request.args.get("url")
|
||||
|
||||
if not url or not (url.startswith("https://media.geeksforgeeks.org/")):
|
||||
logger.error(f"Invalid URL for proxying: {url}")
|
||||
return "Invalid URL"
|
||||
|
||||
logger.debug(f"Proxying {url}")
|
||||
|
||||
with urllib.request.urlopen(url) as response:
|
||||
data = response.read()
|
||||
return data
|
||||
|
||||
|
||||
@app.route("/<path:path>")
|
||||
def article_page(path):
|
||||
"""Renders the article page.
|
||||
|
||||
Args:
|
||||
path (str): The path to the article.
|
||||
|
||||
Returns:
|
||||
Text: The rendered article page.
|
||||
"""
|
||||
response = urllib.request.urlopen(f"https://www.geeksforgeeks.org/{path}")
|
||||
|
||||
if response.getcode() == 200:
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
content = get_content(soup)
|
||||
title = content.find(class_="article-title").text
|
||||
return render_template("article.html", content=content, title=title)
|
||||
else:
|
||||
return (
|
||||
render_template("error.html", code=response.getcode()),
|
||||
response.getcode(),
|
||||
)
|
||||
|
||||
|
||||
def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
||||
article_content = soup.find("div", {"class": "a-wrapper"}).find("article")
|
||||
|
||||
for img in article_content.find_all("img"):
|
||||
img["src"] = f"/proxy?url={img['src']}"
|
||||
|
||||
for element in article_content.find_all(["script", "style"]):
|
||||
element.decompose()
|
||||
|
||||
for ad in article_content.find_all():
|
||||
if ad.get("id") and ad["id"].startswith("GFG_AD_"):
|
||||
ad.decompose()
|
||||
|
||||
for link in article_content.find_all("a"):
|
||||
if link.get("href") and link["href"].startswith("https://www.geeksforgeeks.org/"):
|
||||
if not link["href"].startswith("https://www.geeksforgeeks.org/user/"):
|
||||
link["href"] = f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
|
||||
|
||||
else:
|
||||
classes = link.get("class", [])
|
||||
classes.append("gfg-link")
|
||||
link["class"] = classes
|
||||
|
||||
else:
|
||||
classes = link.get("class", [])
|
||||
classes.append("external-link")
|
||||
link["class"] = classes
|
||||
|
||||
return article_content
|
||||
|
||||
|
||||
def main():
|
||||
port = int(os.getenv("PORT", 8113))
|
||||
debug = bool(os.getenv("DEBUG", False))
|
||||
app.run(port=port, debug=debug)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
23
src/ducksforducks/static/css/style.css
Normal file
23
src/ducksforducks/static/css/style.css
Normal file
|
@ -0,0 +1,23 @@
|
|||
.onopen-discussion-panel,
|
||||
.article_bottom_suggestion_wrapper,
|
||||
.article-meta-author-details-follow-button,
|
||||
.three_dot_dropdown,
|
||||
#myDropdown {
|
||||
display: none;
|
||||
}
|
||||
|
||||
a.gfg-link::after {
|
||||
content: " (opens on geeksforgeeks.org)";
|
||||
font-size: 0.8em;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
a.external-link::after {
|
||||
content: " (external link)";
|
||||
font-size: 0.8em;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.text {
|
||||
margin-top: 2em;
|
||||
}
|
7
src/ducksforducks/static/dist/css/bootstrap.min.css
vendored
Normal file
7
src/ducksforducks/static/dist/css/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
10
src/ducksforducks/templates/article.html
Normal file
10
src/ducksforducks/templates/article.html
Normal file
|
@ -0,0 +1,10 @@
|
|||
{% extends "base.html" %} {% block title %}{{ title }} - Ducks for Ducks {%
|
||||
endblock %} {% block content %}
|
||||
<div class="row">
|
||||
<div class="col-md-9">
|
||||
<article>
|
||||
{{ content | safe }}
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
36
src/ducksforducks/templates/base.html
Normal file
36
src/ducksforducks/templates/base.html
Normal file
|
@ -0,0 +1,36 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>{% block title %}Ducks for Ducks{% endblock %}</title>
|
||||
<link
|
||||
href="{{ url_for('static', filename='dist/css/bootstrap.min.css') }}"
|
||||
rel="stylesheet"
|
||||
/>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="{{ url_for('static', filename='css/style.css') }}"
|
||||
/>
|
||||
</head>
|
||||
<body>
|
||||
<nav class="navbar navbar-dark bg-dark">
|
||||
<div class="container">
|
||||
<a class="navbar-brand" href="{{ url_for('index') }}"
|
||||
>Ducks for Ducks</a
|
||||
>
|
||||
</div>
|
||||
</nav>
|
||||
<div class="container mt-4">{% block content %}{% endblock %}</div>
|
||||
<footer class="footer mt-4 py-3 bg-dark text-light">
|
||||
<div class="container">
|
||||
<span class="text-muted"
|
||||
>Ducks for Ducks is brought to you by
|
||||
<a href="https://git.private.coffee/PrivateCoffee/ducksforducks"
|
||||
>Private.coffee</a
|
||||
>
|
||||
</span>
|
||||
</div>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
17
src/ducksforducks/templates/index.html
Normal file
17
src/ducksforducks/templates/index.html
Normal file
|
@ -0,0 +1,17 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Ducks for Ducks - Home{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="jumbotron">
|
||||
<h1 class="display-4">Welcome to Ducks for Ducks!</h1>
|
||||
<p class="lead">Ducks for Ducks is a simple proxy service to geeksforgeeks.org</p>
|
||||
<hr class="my-4">
|
||||
<h3>Usage</h3>
|
||||
<p>To use this service, simply replace <code>geeksforgeeks.org</code> with <code>{{ host }}</code> in the site URL, like this:</p>
|
||||
<a href="https://{{ host }}/the-fox-the-duck-and-a-circular-pond/"><code>https://{{ host }}/the-fox-the-duck-and-a-circular-pond/</code></a>
|
||||
<hr class="my-4">
|
||||
<h3>Work in progress</h3>
|
||||
<p>Fetching the content of Geeks for Geeks' home page is still in the works. Also, user profiles and other features are Coming Soon™.</p>
|
||||
</div>
|
||||
{% endblock %}
|
Loading…
Reference in a new issue