feat: add proxy service for GeeksforGeeks articles
Set up initial project structure and dependencies for Ducks for Ducks proxy service. - Added .gitignore to exclude virtual environments, __pycache__, and build directories. - Included MIT License for the project. - Created README.md for project documentation. - Configured pyproject.toml to use Hatchling as build system, specified project metadata and dependencies. - Implemented Flask app for proxying GeeksforGeeks articles: - Routes to serve static files, index page, proxy images, and render articles. - Function to fetch and clean article content. - Added custom CSS for styling differences between internal and external links. - Included Bootstrap CSS for base styling. - Created HTML templates for base layout, index, and article pages. This change sets up the core functionality of proxying GeeksforGeeks content through the Flask application.
This commit is contained in:
commit
5f710d8a31
11 changed files with 277 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
/dist/
|
19
LICENSE
Normal file
19
LICENSE
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
Copyright (c) 2024 Private.coffee Team <support@private.coffee>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
0
README.md
Normal file
0
README.md
Normal file
26
pyproject.toml
Normal file
26
pyproject.toml
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "ducksforducks"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = [{ name = "Private.coffee Team", email = "support@private.coffee" }]
|
||||||
|
description = "A simple frontend for GeeksforGeeks"
|
||||||
|
readme = "README.md"
|
||||||
|
license = { file = "LICENSE" }
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
classifiers = [
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
]
|
||||||
|
dependencies = ["flask", "bs4"]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
ducksforducks = "ducksforducks.app:main"
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
"Homepage" = "https://git.private.coffee/privatecoffee/ducksforducks"
|
||||||
|
"Bug Tracker" = "https://git.private.coffee/privatecoffee/ducksforducks/issues"
|
||||||
|
"Source Code" = "https://git.private.coffee/privatecoffee/ducksforducks"
|
0
src/ducksforducks/__init__.py
Normal file
0
src/ducksforducks/__init__.py
Normal file
134
src/ducksforducks/app.py
Normal file
134
src/ducksforducks/app.py
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
from flask import (
|
||||||
|
Flask,
|
||||||
|
request,
|
||||||
|
send_from_directory,
|
||||||
|
render_template,
|
||||||
|
Response,
|
||||||
|
)
|
||||||
|
import os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import logging
|
||||||
|
import urllib
|
||||||
|
from typing import Text
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setLevel(logging.DEBUG)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
# Remove the default Flask logger
|
||||||
|
app.logger.removeHandler(app.logger.handlers[0])
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/static/<path:path>")
|
||||||
|
def static_files(path: str) -> Response:
|
||||||
|
"""Serves static files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str): The path to the static file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response: The static file.
|
||||||
|
"""
|
||||||
|
return send_from_directory("static", path)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def index() -> Text:
|
||||||
|
"""Renders the index page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text: The rendered index page.
|
||||||
|
"""
|
||||||
|
host = request.host
|
||||||
|
return render_template("index.html", host=host)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/proxy")
|
||||||
|
def proxy() -> bytes:
|
||||||
|
"""A simple proxy for images.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bytes: The content of the proxied URL.
|
||||||
|
"""
|
||||||
|
url = request.args.get("url")
|
||||||
|
|
||||||
|
if not url or not (url.startswith("https://media.geeksforgeeks.org/")):
|
||||||
|
logger.error(f"Invalid URL for proxying: {url}")
|
||||||
|
return "Invalid URL"
|
||||||
|
|
||||||
|
logger.debug(f"Proxying {url}")
|
||||||
|
|
||||||
|
with urllib.request.urlopen(url) as response:
|
||||||
|
data = response.read()
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/<path:path>")
|
||||||
|
def article_page(path):
|
||||||
|
"""Renders the article page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str): The path to the article.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text: The rendered article page.
|
||||||
|
"""
|
||||||
|
response = urllib.request.urlopen(f"https://www.geeksforgeeks.org/{path}")
|
||||||
|
|
||||||
|
if response.getcode() == 200:
|
||||||
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
|
content = get_content(soup)
|
||||||
|
title = content.find(class_="article-title").text
|
||||||
|
return render_template("article.html", content=content, title=title)
|
||||||
|
else:
|
||||||
|
return (
|
||||||
|
render_template("error.html", code=response.getcode()),
|
||||||
|
response.getcode(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_content(soup: BeautifulSoup) -> BeautifulSoup:
|
||||||
|
article_content = soup.find("div", {"class": "a-wrapper"}).find("article")
|
||||||
|
|
||||||
|
for img in article_content.find_all("img"):
|
||||||
|
img["src"] = f"/proxy?url={img['src']}"
|
||||||
|
|
||||||
|
for element in article_content.find_all(["script", "style"]):
|
||||||
|
element.decompose()
|
||||||
|
|
||||||
|
for ad in article_content.find_all():
|
||||||
|
if ad.get("id") and ad["id"].startswith("GFG_AD_"):
|
||||||
|
ad.decompose()
|
||||||
|
|
||||||
|
for link in article_content.find_all("a"):
|
||||||
|
if link.get("href") and link["href"].startswith("https://www.geeksforgeeks.org/"):
|
||||||
|
if not link["href"].startswith("https://www.geeksforgeeks.org/user/"):
|
||||||
|
link["href"] = f"/{link['href'].replace('https://www.geeksforgeeks.org/', '')}"
|
||||||
|
|
||||||
|
else:
|
||||||
|
classes = link.get("class", [])
|
||||||
|
classes.append("gfg-link")
|
||||||
|
link["class"] = classes
|
||||||
|
|
||||||
|
else:
|
||||||
|
classes = link.get("class", [])
|
||||||
|
classes.append("external-link")
|
||||||
|
link["class"] = classes
|
||||||
|
|
||||||
|
return article_content
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
port = int(os.getenv("PORT", 8113))
|
||||||
|
debug = bool(os.getenv("DEBUG", False))
|
||||||
|
app.run(port=port, debug=debug)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
23
src/ducksforducks/static/css/style.css
Normal file
23
src/ducksforducks/static/css/style.css
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
.onopen-discussion-panel,
|
||||||
|
.article_bottom_suggestion_wrapper,
|
||||||
|
.article-meta-author-details-follow-button,
|
||||||
|
.three_dot_dropdown,
|
||||||
|
#myDropdown {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.gfg-link::after {
|
||||||
|
content: " (opens on geeksforgeeks.org)";
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.external-link::after {
|
||||||
|
content: " (external link)";
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text {
|
||||||
|
margin-top: 2em;
|
||||||
|
}
|
7
src/ducksforducks/static/dist/css/bootstrap.min.css
vendored
Normal file
7
src/ducksforducks/static/dist/css/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
10
src/ducksforducks/templates/article.html
Normal file
10
src/ducksforducks/templates/article.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
{% extends "base.html" %} {% block title %}{{ title }} - Ducks for Ducks {%
|
||||||
|
endblock %} {% block content %}
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-9">
|
||||||
|
<article>
|
||||||
|
{{ content | safe }}
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
36
src/ducksforducks/templates/base.html
Normal file
36
src/ducksforducks/templates/base.html
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>{% block title %}Ducks for Ducks{% endblock %}</title>
|
||||||
|
<link
|
||||||
|
href="{{ url_for('static', filename='dist/css/bootstrap.min.css') }}"
|
||||||
|
rel="stylesheet"
|
||||||
|
/>
|
||||||
|
<link
|
||||||
|
rel="stylesheet"
|
||||||
|
href="{{ url_for('static', filename='css/style.css') }}"
|
||||||
|
/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<nav class="navbar navbar-dark bg-dark">
|
||||||
|
<div class="container">
|
||||||
|
<a class="navbar-brand" href="{{ url_for('index') }}"
|
||||||
|
>Ducks for Ducks</a
|
||||||
|
>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
<div class="container mt-4">{% block content %}{% endblock %}</div>
|
||||||
|
<footer class="footer mt-4 py-3 bg-dark text-light">
|
||||||
|
<div class="container">
|
||||||
|
<span class="text-muted"
|
||||||
|
>Ducks for Ducks is brought to you by
|
||||||
|
<a href="https://git.private.coffee/PrivateCoffee/ducksforducks"
|
||||||
|
>Private.coffee</a
|
||||||
|
>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
17
src/ducksforducks/templates/index.html
Normal file
17
src/ducksforducks/templates/index.html
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Ducks for Ducks - Home{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="jumbotron">
|
||||||
|
<h1 class="display-4">Welcome to Ducks for Ducks!</h1>
|
||||||
|
<p class="lead">Ducks for Ducks is a simple proxy service to geeksforgeeks.org</p>
|
||||||
|
<hr class="my-4">
|
||||||
|
<h3>Usage</h3>
|
||||||
|
<p>To use this service, simply replace <code>geeksforgeeks.org</code> with <code>{{ host }}</code> in the site URL, like this:</p>
|
||||||
|
<a href="https://{{ host }}/the-fox-the-duck-and-a-circular-pond/"><code>https://{{ host }}/the-fox-the-duck-and-a-circular-pond/</code></a>
|
||||||
|
<hr class="my-4">
|
||||||
|
<h3>Work in progress</h3>
|
||||||
|
<p>Fetching the content of Geeks for Geeks' home page is still in the works. Also, user profiles and other features are Coming Soon™.</p>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
Loading…
Reference in a new issue