feat: Refactor background tasks
This commit is contained in:
parent
7e3e88cb74
commit
37dde40cbf
4 changed files with 95 additions and 82 deletions
|
@ -1,14 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from flask import Flask
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
import time
|
||||
|
||||
from .config import Config
|
||||
from .routes import init_routes
|
||||
from .utils.data import update_data
|
||||
from .routes.proxy import start_cache_cleanup_thread
|
||||
from .utils.data import maybe_update_data
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -19,47 +17,30 @@ app.config.from_object(Config)
|
|||
logger.debug("Initializing routes")
|
||||
init_routes(app)
|
||||
logger.debug("Performing initial data update")
|
||||
update_data(app)
|
||||
maybe_update_data(app)
|
||||
|
||||
def background_update_data(app):
|
||||
"""Runs the update_data function every 5 minutes.
|
||||
|
||||
This replaces the need for a cron job to update the data.
|
||||
|
||||
Args:
|
||||
app (Flask): The Flask app instance.
|
||||
"""
|
||||
logger.debug("Starting background update thread")
|
||||
while True:
|
||||
logger.debug("Running scheduled data update")
|
||||
update_data(app)
|
||||
logger.debug("Data update complete, sleeping for 5 minutes")
|
||||
time.sleep(300)
|
||||
|
||||
def main():
|
||||
if app.config["DEBUG"]:
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
logger.debug("Starting background update thread")
|
||||
threading.Thread(target=background_update_data, args=(app,), daemon=True).start()
|
||||
|
||||
# Start the cache cleanup thread
|
||||
start_cache_cleanup_thread(app)
|
||||
|
||||
logger.info(f"Starting Structables on {app.config['LISTEN_HOST']}:{app.config['PORT']}")
|
||||
|
||||
logger.info(
|
||||
f"Starting Structables on {app.config['LISTEN_HOST']}:{app.config['PORT']}"
|
||||
)
|
||||
app.run(
|
||||
port=app.config["PORT"],
|
||||
host=app.config["LISTEN_HOST"],
|
||||
debug=app.config["DEBUG"],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
@ -6,13 +6,16 @@ from urllib.parse import quote
|
|||
from werkzeug.exceptions import InternalServerError
|
||||
from markdown2 import Markdown
|
||||
from traceback import print_exc
|
||||
|
||||
import pathlib
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
|
||||
from ..utils.data import update_data
|
||||
from ..utils.helpers import explore_lists, proxy
|
||||
from .category import project_list
|
||||
from ..utils.data import maybe_update_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -22,6 +25,10 @@ def init_main_routes(app):
|
|||
def route_explore():
|
||||
logger.debug("Rendering explore page")
|
||||
|
||||
# Occasionally trigger data update (every 20th request)
|
||||
if random.randint(1, 20) == 1:
|
||||
maybe_update_data(app)
|
||||
|
||||
try:
|
||||
logger.debug("Fetching data from instructables.com")
|
||||
data = urlopen("https://www.instructables.com/")
|
||||
|
|
|
@ -7,14 +7,12 @@ import logging
|
|||
import os
|
||||
import hashlib
|
||||
import time
|
||||
import threading
|
||||
import shutil
|
||||
import random
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache cleanup thread reference
|
||||
cache_cleanup_thread = None
|
||||
# Track last cache cleanup time
|
||||
last_cache_cleanup = 0
|
||||
|
||||
|
||||
def get_cache_path(app, url):
|
||||
|
@ -95,24 +93,35 @@ def get_content_type(cache_path):
|
|||
return "application/octet-stream"
|
||||
|
||||
|
||||
def cache_cleanup(app):
|
||||
"""Clean up the cache directory to stay within size limits.
|
||||
|
||||
This function removes the oldest files first until the cache size
|
||||
is below the maximum size.
|
||||
def maybe_cleanup_cache(app):
|
||||
"""Clean up the cache directory if it's time to do so.
|
||||
|
||||
Args:
|
||||
app: The Flask app instance.
|
||||
"""
|
||||
global last_cache_cleanup
|
||||
|
||||
# If caching is disabled, don't do anything
|
||||
if not app.config["CACHE_ENABLED"]:
|
||||
return
|
||||
|
||||
# Check if it's time to run cleanup
|
||||
current_time = time.time()
|
||||
cleanup_interval = app.config["CACHE_CLEANUP_INTERVAL"]
|
||||
|
||||
if current_time - last_cache_cleanup < cleanup_interval:
|
||||
logger.debug(
|
||||
f"Cache cleanup skipped. Time since last cleanup: {current_time - last_cache_cleanup:.2f} seconds"
|
||||
)
|
||||
return
|
||||
|
||||
logger.debug("Starting cache cleanup")
|
||||
last_cache_cleanup = current_time
|
||||
|
||||
try:
|
||||
cache_dir = app.config["CACHE_DIR"]
|
||||
max_size = app.config["CACHE_MAX_SIZE"]
|
||||
max_age = app.config["CACHE_MAX_AGE"]
|
||||
|
||||
# Get all cache files with their modification times
|
||||
cache_files = []
|
||||
|
@ -121,6 +130,10 @@ def cache_cleanup(app):
|
|||
for filename in os.listdir(cache_dir):
|
||||
file_path = os.path.join(cache_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
# Skip metadata files in the count
|
||||
if file_path.endswith(".meta"):
|
||||
continue
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
file_time = os.path.getmtime(file_path)
|
||||
total_size += file_size
|
||||
|
@ -128,7 +141,34 @@ def cache_cleanup(app):
|
|||
|
||||
logger.debug(f"Current cache size: {total_size / (1024 * 1024):.2f} MB")
|
||||
|
||||
# If we're over the size limit, remove oldest files first
|
||||
# First, remove expired files
|
||||
current_time = time.time()
|
||||
expired_files = [
|
||||
(path, mtime, size)
|
||||
for path, mtime, size in cache_files
|
||||
if current_time - mtime > max_age
|
||||
]
|
||||
|
||||
for file_path, _, file_size in expired_files:
|
||||
try:
|
||||
os.remove(file_path)
|
||||
# Also remove metadata file if it exists
|
||||
meta_path = file_path + ".meta"
|
||||
if os.path.exists(meta_path):
|
||||
os.remove(meta_path)
|
||||
total_size -= file_size
|
||||
logger.debug(f"Removed expired cache file: {file_path}")
|
||||
except OSError:
|
||||
logger.warning(f"Failed to remove expired cache file: {file_path}")
|
||||
|
||||
# Remove files from the list that we've already deleted
|
||||
cache_files = [
|
||||
(path, mtime, size)
|
||||
for path, mtime, size in cache_files
|
||||
if (path, mtime, size) not in expired_files
|
||||
]
|
||||
|
||||
# If we're still over the size limit, remove oldest files first
|
||||
if total_size > max_size:
|
||||
logger.debug("Cache size exceeds limit, cleaning up")
|
||||
# Sort by modification time (oldest first)
|
||||
|
@ -147,7 +187,7 @@ def cache_cleanup(app):
|
|||
os.remove(meta_path)
|
||||
|
||||
total_size -= file_size
|
||||
logger.debug(f"Removed cache file: {file_path}")
|
||||
logger.debug(f"Removed old cache file: {file_path}")
|
||||
except OSError:
|
||||
logger.warning(f"Failed to remove cache file: {file_path}")
|
||||
|
||||
|
@ -159,40 +199,6 @@ def cache_cleanup(app):
|
|||
logger.error(f"Error during cache cleanup: {str(e)}")
|
||||
|
||||
|
||||
def start_cache_cleanup_thread(app):
|
||||
"""Start a background thread to periodically clean up the cache.
|
||||
|
||||
Args:
|
||||
app: The Flask app instance.
|
||||
"""
|
||||
global cache_cleanup_thread
|
||||
|
||||
# If thread is already running, don't start another one
|
||||
if cache_cleanup_thread is not None and cache_cleanup_thread.is_alive():
|
||||
return
|
||||
|
||||
# If caching is disabled, don't start the thread
|
||||
if not app.config["CACHE_ENABLED"]:
|
||||
logger.debug("Caching is disabled, not starting cache cleanup thread")
|
||||
return
|
||||
|
||||
def cleanup_worker():
|
||||
while True:
|
||||
try:
|
||||
with app.app_context():
|
||||
cache_cleanup(app)
|
||||
cleanup_interval = app.config["CACHE_CLEANUP_INTERVAL"]
|
||||
time.sleep(cleanup_interval)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in cache cleanup worker: {str(e)}")
|
||||
# Sleep a bit to avoid tight loop in case of recurring errors
|
||||
time.sleep(60)
|
||||
|
||||
cache_cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
|
||||
cache_cleanup_thread.start()
|
||||
logger.debug("Started cache cleanup background thread")
|
||||
|
||||
|
||||
def init_proxy_routes(app):
|
||||
# Create cache directory if it doesn't exist and caching is enabled
|
||||
if app.config["CACHE_ENABLED"]:
|
||||
|
@ -216,12 +222,8 @@ def init_proxy_routes(app):
|
|||
|
||||
logger.debug(f"Proxy request for URL: {url}, filename: {filename}")
|
||||
|
||||
# Check if the cache cleanup thread is running
|
||||
if cache_cleanup_thread is None:
|
||||
# Use every 100th request to trigger cleanup
|
||||
if random.randint(1, 100) == 1:
|
||||
logger.debug("Triggering cache cleanup")
|
||||
cache_cleanup(app)
|
||||
# Clean up the cache if needed
|
||||
maybe_cleanup_cache(app)
|
||||
|
||||
if url is not None:
|
||||
if url.startswith("https://cdn.instructables.com/") or url.startswith(
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
from urllib.request import urlopen
|
||||
import logging
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
from .helpers import proxy, projects_search
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Track the last data update time
|
||||
last_data_update = 0
|
||||
|
||||
def update_data(app):
|
||||
"""Update the application's cached data.
|
||||
|
||||
|
@ -73,4 +77,23 @@ def update_data(app):
|
|||
logger.debug(f"Updated global projects list with {len(app.global_ibles['/projects'])} projects")
|
||||
logger.debug("Data update completed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating data: {str(e)}")
|
||||
logger.error(f"Error updating data: {str(e)}")
|
||||
|
||||
|
||||
def maybe_update_data(app):
|
||||
"""Updates the data if it's time to do so.
|
||||
|
||||
This replaces the background thread with a request-triggered update.
|
||||
|
||||
Args:
|
||||
app (Flask): The Flask app instance.
|
||||
"""
|
||||
global last_data_update
|
||||
current_time = time.time()
|
||||
|
||||
# Update every 5 minutes (300 seconds)
|
||||
if current_time - last_data_update >= 300:
|
||||
logger.debug("Running scheduled data update")
|
||||
update_data(app)
|
||||
last_data_update = current_time
|
||||
logger.debug("Data update complete")
|
Loading…
Add table
Add a link
Reference in a new issue