commit 1d28a5a445f7d329b4aeac0115fd82fb8dce1643 Author: Kumi Date: Thu Jun 1 20:45:34 2023 +0000 Working version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5154691 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.mp4 +settings.ini +pukuthek.db +venv/ +__pycache__/ +*.pyc \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..de288e1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "black" +} \ No newline at end of file diff --git a/pukuthek.py b/pukuthek.py new file mode 100644 index 0000000..1b87d85 --- /dev/null +++ b/pukuthek.py @@ -0,0 +1,192 @@ +from yt_dlp import YoutubeDL +from bs4 import BeautifulSoup + +from datetime import datetime +from urllib.request import urlopen, Request +from urllib.parse import urlencode +from urllib.error import HTTPError +from configparser import ConfigParser +from contextlib import closing +from pathlib import Path + +import sqlite3 +import json + +CONFIG = ConfigParser() +CONFIG.read("settings.ini") + +URLS = json.loads(CONFIG["Pukuthek"]["URLs"]) +DATABASE_FILE = CONFIG["Pukuthek"].get("Database", "pukuthek.db") + +PEERTUBE_API = CONFIG["PeerTube"]["APIPath"] +PEERTUBE_USER = CONFIG["PeerTube"]["Username"] +PEERTUBE_PASSWORD = CONFIG["PeerTube"]["Password"] +PEERTUBE_CHANNEL = CONFIG["PeerTube"]["ChannelID"] + +PARAMS = { + "postprocessors": [ + { + "key": "FFmpegConcat", + "only_multi_video": False, + "when": "playlist", + }, + { + "key": "FFmpegVideoConvertor", + "preferedformat": "mp4", + }, + ], +} + +#### + +# Initialize database + +db = sqlite3.connect(DATABASE_FILE) + +with closing(db.cursor()) as cursor: + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS videos ( + id INTEGER, + title TEXT, + date TEXT, + url TEXT, + timestamp INTEGER, + PRIMARY KEY (id, timestamp) + ) + """ + ) + db.commit() + +if not URLS: + print("No URLs given") + exit() + +if not PEERTUBE_API.endswith("/"): + PEERTUBE_API += "/" + +oauth_lookup_url = PEERTUBE_API + "oauth-clients/local" +oauth_lookup_response = urlopen(oauth_lookup_url) +oauth_lookup_data = json.loads(oauth_lookup_response.read()) + +oauth_client_id = oauth_lookup_data["client_id"] +oauth_client_secret = oauth_lookup_data["client_secret"] + +oauth_url = PEERTUBE_API + "users/token" +oauth_data = { + "client_id": oauth_client_id, + "client_secret": oauth_client_secret, + "grant_type": "password", + "response_type": "code", + "username": PEERTUBE_USER, + "password": PEERTUBE_PASSWORD, +} +oauth_response = urlopen(oauth_url, urlencode(oauth_data).encode("utf-8")) +oauth_result = json.loads(oauth_response.read()) +oauth_token = oauth_result["access_token"] + +with YoutubeDL(PARAMS) as ydl: + for url in URLS: + page = urlopen(url) + soup = BeautifulSoup(page.read(), "html.parser") + + datetime_field = soup.find("time").get("datetime") + date = soup.find("span", {"class": "date"}).text + title = soup.find("span", {"class": "js-profile"}).text + video_id = url.split("/")[-1] + + if not video_id: + video_id = url.split("/")[-2] + + datetime = datetime.strptime(datetime_field, "%Y-%m-%dCEST%H:%M:%S") + timestamp = int(datetime.timestamp()) + + with closing(db.cursor()) as cursor: + cursor.execute( + """ + SELECT * FROM videos + WHERE id = ? AND timestamp = ? + """, + (video_id, timestamp), + ) + + if cursor.fetchone(): + print(f"Skipping {title} ({date})") + continue + + ydl.params["outtmpl"]["default"] = f"{timestamp}-%(id)s.%(ext)s" + + ydl.download([url]) + + print(f"Downloaded {title} ({date})") + + video_data = open(f"{timestamp}-{video_id}.mp4", "rb").read() + + upload_url = PEERTUBE_API + "videos/upload-resumable" + upload_data = { + "channelId": PEERTUBE_CHANNEL, + "filename": f"{timestamp}-{video_id}.mp4", + "name": f"{title} ({date})", + "privacy": "3", + } + upload_request = Request( + upload_url, + data=urlencode(upload_data).encode("utf-8"), + headers={ + "content-type": "application/x-www-form-urlencoded", + "X-Upload-Content-Type": "video/mp4", + "X-Upload-Content-Length": len(video_data), + }, + ) + upload_request.add_header("Authorization", f"Bearer {oauth_token}") + + try: + upload_result = urlopen(upload_request) + except HTTPError as e: + print(e) + print(e.read()) + exit() + + location = upload_result.getheader("Location") + + for chunk in range(0, len(video_data), 1024 * 1024): + chunk_upload_url = PEERTUBE_API + f"videos/upload-resumable?upload_id={location.split('=')[-1]}" + chunk_upload_request = Request( + chunk_upload_url, + headers={ + "content-type": "application/octet-stream", + }, + method="PUT", + ) + chunk_upload_request.add_header( + "Content-Range", + f"bytes {chunk}-{min(chunk + 1024 * 1024 - 1, len(video_data) - 1)}/{len(video_data)}", + ) + chunk_upload_request.add_header("Authorization", f"Bearer {oauth_token}") + chunk_upload_request.data = video_data[chunk : chunk + 1024 * 1024] + + try: + print(f"Uploading chunk {chunk}...") + chunk_upload_response = urlopen(chunk_upload_request) + chunk_upload_result = json.loads(chunk_upload_response.read()) + except HTTPError as e: + if e.code == 308: + continue + + print(e) + print(e.read()) + exit() + + print(f"Uploaded {title} ({date})") + + with closing(db.cursor()) as cursor: + cursor.execute( + """ + INSERT INTO videos + VALUES (?, ?, ?, ?, ?) + """, + (video_id, title, date, url, timestamp), + ) + db.commit() + + Path(f"{timestamp}-{video_id}.mp4").unlink() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..18f87d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +yt-dlp +bs4 \ No newline at end of file diff --git a/settings.dist.ini b/settings.dist.ini new file mode 100644 index 0000000..3ca1338 --- /dev/null +++ b/settings.dist.ini @@ -0,0 +1,10 @@ +[Pukuthek] +URLs = [ + "https://tvthek.orf.at/profile/Salzburg-heute/70019/" + ] + +[PeerTube] +APIPath = https://kumi.tube/api/v1/ +Username = pukuthek +Password = password +ChannelID = 1234 \ No newline at end of file