feat: Caching proxied content
Some linting
This commit is contained in:
parent
dd043bd397
commit
e86c24251d
8 changed files with 358 additions and 42 deletions
|
@ -14,20 +14,24 @@ An open source alternative front-end to Instructables. This is a fork of <a href
|
||||||
## Instances
|
## Instances
|
||||||
|
|
||||||
<!-- START_INSTANCE_LIST type:eq=clearnet -->
|
<!-- START_INSTANCE_LIST type:eq=clearnet -->
|
||||||
|
|
||||||
| URL | Provided by | Country | Notes |
|
| URL | Provided by | Country | Notes |
|
||||||
| ---------------------------------------------------------------------- | ---------------------------------------------- | ---------------- | ------------- |
|
| ---------------------------------------------------------------------- | ---------------------------------------------- | ---------------- | ------------- |
|
||||||
| [structables.private.coffee](https://structables.private.coffee) | [Private.coffee](https://private.coffee) | Austria 🇦🇹 🇪🇺 | Main instance |
|
| [structables.private.coffee](https://structables.private.coffee) | [Private.coffee](https://private.coffee) | Austria 🇦🇹 🇪🇺 | Main instance |
|
||||||
| [structables.bloat.cat](https://structables.bloat.cat) | [Bloat.cat](https://bloat.cat) | Germany 🇩🇪 🇪🇺 | |
|
| [structables.bloat.cat](https://structables.bloat.cat) | [Bloat.cat](https://bloat.cat) | Germany 🇩🇪 🇪🇺 | |
|
||||||
| [structables.darkness.services](https://structables.darkness.services) | [Darkness.services](https://darkness.services) | United States 🇺🇸 | |
|
| [structables.darkness.services](https://structables.darkness.services) | [Darkness.services](https://darkness.services) | United States 🇺🇸 | |
|
||||||
|
|
||||||
<!-- END_INSTANCE_LIST -->
|
<!-- END_INSTANCE_LIST -->
|
||||||
|
|
||||||
### Tor Hidden Services
|
### Tor Hidden Services
|
||||||
|
|
||||||
<!-- START_INSTANCE_LIST type:eq=onion -->
|
<!-- START_INSTANCE_LIST type:eq=onion -->
|
||||||
|
|
||||||
| URL | Provided by | Country | Notes |
|
| URL | Provided by | Country | Notes |
|
||||||
| --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | ---------------- | ------------- |
|
| --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | ---------------- | ------------- |
|
||||||
| [structables.coffee2m3bjsrrqqycx6ghkxrnejl2q6nl7pjw2j4clchjj6uk5zozad.onion](http://structables.coffee2m3bjsrrqqycx6ghkxrnejl2q6nl7pjw2j4clchjj6uk5zozad.onion) | [Private.coffee](https://private.coffee) | Austria 🇦🇹 🇪🇺 | Main instance |
|
| [structables.coffee2m3bjsrrqqycx6ghkxrnejl2q6nl7pjw2j4clchjj6uk5zozad.onion](http://structables.coffee2m3bjsrrqqycx6ghkxrnejl2q6nl7pjw2j4clchjj6uk5zozad.onion) | [Private.coffee](https://private.coffee) | Austria 🇦🇹 🇪🇺 | Main instance |
|
||||||
| [structables.darknessrdor43qkl2ngwitj72zdavfz2cead4t5ed72bybgauww5lyd.onion](http://structables.darknessrdor43qkl2ngwitj72zdavfz2cead4t5ed72bybgauww5lyd.onion) | [Darkness.services](https://darkness.services) | United States 🇺🇸 | |
|
| [structables.darknessrdor43qkl2ngwitj72zdavfz2cead4t5ed72bybgauww5lyd.onion](http://structables.darknessrdor43qkl2ngwitj72zdavfz2cead4t5ed72bybgauww5lyd.onion) | [Darkness.services](https://darkness.services) | United States 🇺🇸 | |
|
||||||
|
|
||||||
<!-- END_INSTANCE_LIST -->
|
<!-- END_INSTANCE_LIST -->
|
||||||
|
|
||||||
### Adding Your Instance
|
### Adding Your Instance
|
||||||
|
@ -86,6 +90,11 @@ Structables supports the use of the following environment variables for configur
|
||||||
- `STRUCTABLES_PRIVACY_FILE`: The path to a text file or Markdown file (with .md suffix) to use for the Privacy Policy page (if unset, try `privacy.txt` or `privacy.md` in the working directory, or fall back to a generic message)
|
- `STRUCTABLES_PRIVACY_FILE`: The path to a text file or Markdown file (with .md suffix) to use for the Privacy Policy page (if unset, try `privacy.txt` or `privacy.md` in the working directory, or fall back to a generic message)
|
||||||
- `STRUCTABLES_DEBUG`: If set, log additional debug information to stdout
|
- `STRUCTABLES_DEBUG`: If set, log additional debug information to stdout
|
||||||
- `STRUCTABLES_THEME`: Allows selecting a theme for the frontend. Currently, only `dark` and `light` are supported. If not set, it will be automatically detected based on the user's system settings, and a toggle will be provided in the header.
|
- `STRUCTABLES_THEME`: Allows selecting a theme for the frontend. Currently, only `dark` and `light` are supported. If not set, it will be automatically detected based on the user's system settings, and a toggle will be provided in the header.
|
||||||
|
- `STRUCTABLES_CACHE_ENABLED`: Whether to enable caching of proxied content (default: true). Set to "false" or "0" to disable caching.
|
||||||
|
- `STRUCTABLES_CACHE_DIR`: The directory to use for caching proxied content (default: `structables_cache` within the temporary directory as returned by `tempfile.gettempdir()`)
|
||||||
|
- `STRUCTABLES_CACHE_MAX_AGE`: The maximum age of cached content in seconds before it's considered stale (default: 604800 seconds, or 1 week)
|
||||||
|
- `STRUCTABLES_CACHE_MAX_SIZE`: The maximum size of the cache directory in bytes (default: 1073741824 bytes, or 1GB)
|
||||||
|
- `STRUCTABLES_CACHE_CLEANUP_INTERVAL`: How often to run the cache cleanup process in seconds (default: 3600 seconds, or 1 hour)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|
|
@ -2,3 +2,5 @@ ruff
|
||||||
black
|
black
|
||||||
isort
|
isort
|
||||||
mypy
|
mypy
|
||||||
|
types-beautifulsoup4
|
||||||
|
types-colorama
|
|
@ -1,4 +1,8 @@
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from .utils.helpers import get_typesense_api_key
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
DEBUG = os.environ.get("FLASK_DEBUG", os.environ.get("STRUCTABLES_DEBUG", False))
|
DEBUG = os.environ.get("FLASK_DEBUG", os.environ.get("STRUCTABLES_DEBUG", False))
|
||||||
|
@ -8,6 +12,32 @@ class Config:
|
||||||
UNSAFE = os.environ.get("STRUCTABLES_UNSAFE", False)
|
UNSAFE = os.environ.get("STRUCTABLES_UNSAFE", False)
|
||||||
PRIVACY_FILE = os.environ.get("STRUCTABLES_PRIVACY_FILE")
|
PRIVACY_FILE = os.environ.get("STRUCTABLES_PRIVACY_FILE")
|
||||||
THEME = os.environ.get("STRUCTABLES_THEME", "auto")
|
THEME = os.environ.get("STRUCTABLES_THEME", "auto")
|
||||||
|
TYPESENSE_API_KEY = get_typesense_api_key()
|
||||||
|
|
||||||
|
# Cache settings
|
||||||
|
CACHE_ENABLED = os.environ.get("STRUCTABLES_CACHE_ENABLED", "true").lower() not in (
|
||||||
|
"false",
|
||||||
|
"0",
|
||||||
|
"no",
|
||||||
|
"off",
|
||||||
|
"n",
|
||||||
|
)
|
||||||
|
CACHE_DIR = os.environ.get("STRUCTABLES_CACHE_DIR")
|
||||||
|
|
||||||
|
if CACHE_DIR is None:
|
||||||
|
CACHE_DIR = os.path.join(
|
||||||
|
tempfile.gettempdir(), "structables_cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
CACHE_MAX_AGE = int(
|
||||||
|
os.environ.get("STRUCTABLES_CACHE_MAX_AGE", 60 * 60 * 24 * 7)
|
||||||
|
) # 1 week default
|
||||||
|
CACHE_MAX_SIZE = int(
|
||||||
|
os.environ.get("STRUCTABLES_CACHE_MAX_SIZE", 1024 * 1024 * 1024)
|
||||||
|
) # 1GB default
|
||||||
|
CACHE_CLEANUP_INTERVAL = int(
|
||||||
|
os.environ.get("STRUCTABLES_CACHE_CLEANUP_INTERVAL", 60 * 60)
|
||||||
|
) # 1 hour default
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_app(app):
|
def init_app(app):
|
||||||
|
|
|
@ -8,21 +8,19 @@ import logging
|
||||||
from .config import Config
|
from .config import Config
|
||||||
from .routes import init_routes
|
from .routes import init_routes
|
||||||
from .utils.data import update_data
|
from .utils.data import update_data
|
||||||
from .utils.helpers import get_typesense_api_key
|
from .routes.proxy import start_cache_cleanup_thread
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||||
app.config.from_object(Config)
|
app.config.from_object(Config)
|
||||||
app.typesense_api_key = get_typesense_api_key()
|
|
||||||
|
|
||||||
logger.debug("Initializing routes")
|
logger.debug("Initializing routes")
|
||||||
init_routes(app)
|
init_routes(app)
|
||||||
logger.debug("Performing initial data update")
|
logger.debug("Performing initial data update")
|
||||||
update_data(app)
|
update_data(app)
|
||||||
|
|
||||||
|
|
||||||
def background_update_data(app):
|
def background_update_data(app):
|
||||||
"""Runs the update_data function every 5 minutes.
|
"""Runs the update_data function every 5 minutes.
|
||||||
|
|
||||||
|
@ -38,31 +36,30 @@ def background_update_data(app):
|
||||||
logger.debug("Data update complete, sleeping for 5 minutes")
|
logger.debug("Data update complete, sleeping for 5 minutes")
|
||||||
time.sleep(300)
|
time.sleep(300)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if app.config["DEBUG"]:
|
if app.config["DEBUG"]:
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
level=logging.DEBUG,
|
||||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug("Starting background update thread")
|
logger.debug("Starting background update thread")
|
||||||
threading.Thread(target=background_update_data, args=(app,), daemon=True).start()
|
threading.Thread(target=background_update_data, args=(app,), daemon=True).start()
|
||||||
|
|
||||||
logger.info(
|
# Start the cache cleanup thread
|
||||||
f"Starting Structables on {app.config['LISTEN_HOST']}:{app.config['PORT']}"
|
start_cache_cleanup_thread(app)
|
||||||
)
|
|
||||||
|
logger.info(f"Starting Structables on {app.config['LISTEN_HOST']}:{app.config['PORT']}")
|
||||||
app.run(
|
app.run(
|
||||||
port=app.config["PORT"],
|
port=app.config["PORT"],
|
||||||
host=app.config["LISTEN_HOST"],
|
host=app.config["LISTEN_HOST"],
|
||||||
debug=app.config["DEBUG"],
|
debug=app.config["DEBUG"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
|
@ -74,8 +74,8 @@ def init_contest_routes(app):
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_entries(contest):
|
def get_entries(contest):
|
||||||
base_url = f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search"
|
base_url = "https://www.instructables.com/api_proxy/search/collections/projects/documents/search"
|
||||||
headers = {"x-typesense-api-key": app.typesense_api_key}
|
headers = {"x-typesense-api-key": app.config["TYPESENSE_API_KEY"]}
|
||||||
page, per_page = 1, 100
|
page, per_page = 1, 100
|
||||||
all_entries = []
|
all_entries = []
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ def init_contest_routes(app):
|
||||||
"https://www.instructables.com/json-api/getCurrentContests?limit=50&offset=0"
|
"https://www.instructables.com/json-api/getCurrentContests?limit=50&offset=0"
|
||||||
)
|
)
|
||||||
data = json.loads(response.read().decode())
|
data = json.loads(response.read().decode())
|
||||||
logger.debug(f"Received current contests data")
|
logger.debug("Received current contests data")
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
logger.error(f"HTTP error fetching current contests: {e.code}")
|
logger.error(f"HTTP error fetching current contests: {e.code}")
|
||||||
abort(e.code)
|
abort(e.code)
|
||||||
|
|
|
@ -134,7 +134,7 @@ def init_main_routes(app):
|
||||||
f"https://www.instructables.com/json-api/showInstructableModel?urlString={article}"
|
f"https://www.instructables.com/json-api/showInstructableModel?urlString={article}"
|
||||||
)
|
)
|
||||||
data = json.loads(data.read().decode())
|
data = json.loads(data.read().decode())
|
||||||
logger.debug(f"Successfully fetched article data")
|
logger.debug("Successfully fetched article data")
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
logger.error(f"HTTP error fetching article: {e.code}")
|
logger.error(f"HTTP error fetching article: {e.code}")
|
||||||
abort(e.code)
|
abort(e.code)
|
||||||
|
|
|
@ -4,10 +4,210 @@ from urllib.parse import unquote
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import shutil
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Cache cleanup thread reference
|
||||||
|
cache_cleanup_thread = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_path(app, url):
|
||||||
|
"""Generate a cache file path for a URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: The Flask app instance.
|
||||||
|
url (str): The URL to cache.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The path to the cache file.
|
||||||
|
"""
|
||||||
|
# Create a hash of the URL to use as the filename
|
||||||
|
url_hash = hashlib.sha256(url.encode()).hexdigest()
|
||||||
|
cache_dir = app.config["CACHE_DIR"]
|
||||||
|
return os.path.join(cache_dir, url_hash)
|
||||||
|
|
||||||
|
|
||||||
|
def is_cached(app, url):
|
||||||
|
"""Check if a URL is cached and not expired.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: The Flask app instance.
|
||||||
|
url (str): The URL to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the URL is cached and not expired, False otherwise.
|
||||||
|
"""
|
||||||
|
# If caching is disabled, always return False
|
||||||
|
if not app.config["CACHE_ENABLED"]:
|
||||||
|
return False
|
||||||
|
|
||||||
|
cache_path = get_cache_path(app, url)
|
||||||
|
|
||||||
|
# Check if the file exists
|
||||||
|
if not os.path.exists(cache_path):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if the cache has expired
|
||||||
|
cache_time = os.path.getmtime(cache_path)
|
||||||
|
max_age = app.config["CACHE_MAX_AGE"]
|
||||||
|
if time.time() - cache_time > max_age:
|
||||||
|
# Cache has expired, remove it
|
||||||
|
try:
|
||||||
|
os.remove(cache_path)
|
||||||
|
# Also remove metadata file if it exists
|
||||||
|
meta_path = cache_path + ".meta"
|
||||||
|
if os.path.exists(meta_path):
|
||||||
|
os.remove(meta_path)
|
||||||
|
return False
|
||||||
|
except OSError:
|
||||||
|
logger.warning(f"Failed to remove expired cache file: {cache_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Cache exists and is not expired
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_content_type(cache_path):
|
||||||
|
"""Get the content type from a cache file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cache_path (str): The path to the cache file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The content type, or 'application/octet-stream' if not found.
|
||||||
|
"""
|
||||||
|
meta_path = cache_path + ".meta"
|
||||||
|
if os.path.exists(meta_path):
|
||||||
|
try:
|
||||||
|
with open(meta_path, "r") as f:
|
||||||
|
return f.read().strip()
|
||||||
|
except OSError:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to read content type from cache metadata: {meta_path}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "application/octet-stream"
|
||||||
|
|
||||||
|
|
||||||
|
def cache_cleanup(app):
|
||||||
|
"""Clean up the cache directory to stay within size limits.
|
||||||
|
|
||||||
|
This function removes the oldest files first until the cache size
|
||||||
|
is below the maximum size.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: The Flask app instance.
|
||||||
|
"""
|
||||||
|
# If caching is disabled, don't do anything
|
||||||
|
if not app.config["CACHE_ENABLED"]:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.debug("Starting cache cleanup")
|
||||||
|
|
||||||
|
try:
|
||||||
|
cache_dir = app.config["CACHE_DIR"]
|
||||||
|
max_size = app.config["CACHE_MAX_SIZE"]
|
||||||
|
|
||||||
|
# Get all cache files with their modification times
|
||||||
|
cache_files = []
|
||||||
|
total_size = 0
|
||||||
|
|
||||||
|
for filename in os.listdir(cache_dir):
|
||||||
|
file_path = os.path.join(cache_dir, filename)
|
||||||
|
if os.path.isfile(file_path):
|
||||||
|
file_size = os.path.getsize(file_path)
|
||||||
|
file_time = os.path.getmtime(file_path)
|
||||||
|
total_size += file_size
|
||||||
|
cache_files.append((file_path, file_time, file_size))
|
||||||
|
|
||||||
|
logger.debug(f"Current cache size: {total_size / (1024 * 1024):.2f} MB")
|
||||||
|
|
||||||
|
# If we're over the size limit, remove oldest files first
|
||||||
|
if total_size > max_size:
|
||||||
|
logger.debug("Cache size exceeds limit, cleaning up")
|
||||||
|
# Sort by modification time (oldest first)
|
||||||
|
cache_files.sort(key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Remove files until we're under the limit
|
||||||
|
for file_path, _, file_size in cache_files:
|
||||||
|
if total_size <= max_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
# Also remove metadata file if it exists
|
||||||
|
meta_path = file_path + ".meta"
|
||||||
|
if os.path.exists(meta_path):
|
||||||
|
os.remove(meta_path)
|
||||||
|
|
||||||
|
total_size -= file_size
|
||||||
|
logger.debug(f"Removed cache file: {file_path}")
|
||||||
|
except OSError:
|
||||||
|
logger.warning(f"Failed to remove cache file: {file_path}")
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Cache cleanup complete. New size: {total_size / (1024 * 1024):.2f} MB"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during cache cleanup: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def start_cache_cleanup_thread(app):
|
||||||
|
"""Start a background thread to periodically clean up the cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: The Flask app instance.
|
||||||
|
"""
|
||||||
|
global cache_cleanup_thread
|
||||||
|
|
||||||
|
# If thread is already running, don't start another one
|
||||||
|
if cache_cleanup_thread is not None and cache_cleanup_thread.is_alive():
|
||||||
|
return
|
||||||
|
|
||||||
|
# If caching is disabled, don't start the thread
|
||||||
|
if not app.config["CACHE_ENABLED"]:
|
||||||
|
logger.debug("Caching is disabled, not starting cache cleanup thread")
|
||||||
|
return
|
||||||
|
|
||||||
|
def cleanup_worker():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
with app.app_context():
|
||||||
|
cache_cleanup(app)
|
||||||
|
cleanup_interval = app.config["CACHE_CLEANUP_INTERVAL"]
|
||||||
|
time.sleep(cleanup_interval)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in cache cleanup worker: {str(e)}")
|
||||||
|
# Sleep a bit to avoid tight loop in case of recurring errors
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
|
cache_cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
|
||||||
|
cache_cleanup_thread.start()
|
||||||
|
logger.debug("Started cache cleanup background thread")
|
||||||
|
|
||||||
|
|
||||||
def init_proxy_routes(app):
|
def init_proxy_routes(app):
|
||||||
|
# Create cache directory if it doesn't exist and caching is enabled
|
||||||
|
if app.config["CACHE_ENABLED"]:
|
||||||
|
cache_dir = app.config["CACHE_DIR"]
|
||||||
|
os.makedirs(cache_dir, exist_ok=True)
|
||||||
|
logger.debug(f"Cache directory: {cache_dir}")
|
||||||
|
logger.debug(f"Cache max age: {app.config['CACHE_MAX_AGE']} seconds")
|
||||||
|
logger.debug(
|
||||||
|
f"Cache max size: {app.config['CACHE_MAX_SIZE'] / (1024 * 1024):.2f} MB"
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"Cache cleanup interval: {app.config['CACHE_CLEANUP_INTERVAL']} seconds"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("Caching is disabled")
|
||||||
|
|
||||||
@app.route("/proxy/")
|
@app.route("/proxy/")
|
||||||
def route_proxy():
|
def route_proxy():
|
||||||
url = request.args.get("url")
|
url = request.args.get("url")
|
||||||
|
@ -20,27 +220,102 @@ def init_proxy_routes(app):
|
||||||
"https://content.instructables.com/"
|
"https://content.instructables.com/"
|
||||||
):
|
):
|
||||||
logger.debug(f"Valid proxy URL: {url}")
|
logger.debug(f"Valid proxy URL: {url}")
|
||||||
|
unquoted_url = unquote(url)
|
||||||
|
|
||||||
def generate():
|
# Check if the content is already cached
|
||||||
# Subfunction to allow streaming the data instead of
|
if is_cached(app, unquoted_url):
|
||||||
# downloading all of it at once
|
logger.debug(f"Serving cached content for: {unquoted_url}")
|
||||||
|
cache_path = get_cache_path(app, unquoted_url)
|
||||||
|
content_type = get_content_type(cache_path)
|
||||||
|
|
||||||
|
def generate_from_cache():
|
||||||
|
with open(cache_path, "rb") as f:
|
||||||
|
while True:
|
||||||
|
chunk = f.read(1024 * 1024)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
headers = dict()
|
||||||
|
if filename is not None:
|
||||||
|
headers["Content-Disposition"] = (
|
||||||
|
f'attachment; filename="{filename}"'
|
||||||
|
)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
generate_from_cache(),
|
||||||
|
content_type=content_type,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Content is not cached or caching is disabled, fetch it
|
||||||
|
def generate_and_maybe_cache():
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Opening connection to {url}")
|
logger.debug(f"Opening connection to {unquoted_url}")
|
||||||
with urlopen(unquote(url)) as data:
|
with urlopen(unquoted_url) as data:
|
||||||
logger.debug("Connection established, streaming data")
|
logger.debug("Connection established, streaming data")
|
||||||
|
|
||||||
|
# If caching is enabled, cache the content
|
||||||
|
if app.config["CACHE_ENABLED"]:
|
||||||
|
cache_path = get_cache_path(app, unquoted_url)
|
||||||
|
temp_path = cache_path + ".tmp"
|
||||||
|
with open(temp_path, "wb") as f:
|
||||||
|
while True:
|
||||||
|
chunk = data.read(1024 * 1024)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
f.write(chunk)
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
# Save the content type
|
||||||
|
try:
|
||||||
|
content_type = data.headers["content-type"]
|
||||||
|
with open(cache_path + ".meta", "w") as f:
|
||||||
|
f.write(content_type)
|
||||||
|
except (KeyError, OSError):
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to save content type for: {unquoted_url}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Rename the temporary file to the final cache file
|
||||||
|
try:
|
||||||
|
os.rename(temp_path, cache_path)
|
||||||
|
logger.debug(
|
||||||
|
f"Successfully cached content for: {unquoted_url}"
|
||||||
|
)
|
||||||
|
except OSError:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to rename temporary cache file: {temp_path}"
|
||||||
|
)
|
||||||
|
# Try to copy and delete instead
|
||||||
|
try:
|
||||||
|
shutil.copy2(temp_path, cache_path)
|
||||||
|
os.remove(temp_path)
|
||||||
|
logger.debug(
|
||||||
|
f"Successfully cached content using copy method: {unquoted_url}"
|
||||||
|
)
|
||||||
|
except OSError:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to cache content: {unquoted_url}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# If caching is disabled, just stream the data
|
||||||
while True:
|
while True:
|
||||||
chunk = data.read(1024 * 1024)
|
chunk = data.read(1024 * 1024)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
yield chunk
|
yield chunk
|
||||||
logger.debug("Finished streaming data")
|
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
logger.error(f"HTTP error during streaming: {e.code}")
|
logger.error(f"HTTP error during streaming: {e.code}")
|
||||||
abort(e.code)
|
abort(e.code)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching content: {str(e)}")
|
||||||
|
abort(500)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Getting content type for {url}")
|
logger.debug(f"Getting content type for {unquoted_url}")
|
||||||
with urlopen(unquote(url)) as data:
|
with urlopen(unquoted_url) as data:
|
||||||
content_type = data.headers["content-type"]
|
content_type = data.headers["content-type"]
|
||||||
logger.debug(f"Content type: {content_type}")
|
logger.debug(f"Content type: {content_type}")
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
|
@ -51,14 +326,17 @@ def init_proxy_routes(app):
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
headers = dict()
|
headers = dict()
|
||||||
|
|
||||||
if filename is not None:
|
if filename is not None:
|
||||||
headers["Content-Disposition"] = (
|
headers["Content-Disposition"] = (
|
||||||
f'attachment; filename="{filename}"'
|
f'attachment; filename="{filename}"'
|
||||||
)
|
)
|
||||||
logger.debug(f"Added Content-Disposition header for {filename}")
|
logger.debug(f"Added Content-Disposition header for {filename}")
|
||||||
|
|
||||||
return Response(generate(), content_type=content_type, headers=headers)
|
return Response(
|
||||||
|
generate_and_maybe_cache(),
|
||||||
|
content_type=content_type,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Invalid proxy URL: {url}")
|
logger.warning(f"Invalid proxy URL: {url}")
|
||||||
raise BadRequest()
|
raise BadRequest()
|
||||||
|
|
|
@ -519,7 +519,7 @@ def projects_search(
|
||||||
|
|
||||||
logger.debug(f"Searching projects: query='{query}', filter='{filter_by}', page={page}, per_page={per_page}")
|
logger.debug(f"Searching projects: query='{query}', filter='{filter_by}', page={page}, per_page={per_page}")
|
||||||
|
|
||||||
projects_headers = {"x-typesense-api-key": app.typesense_api_key}
|
projects_headers = {"x-typesense-api-key": app.config["TYPESENSE_API_KEY"]}
|
||||||
|
|
||||||
request_args = {
|
request_args = {
|
||||||
"q": query,
|
"q": query,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue