pukuthek/pukuthek.py

247 lines
7.2 KiB
Python

from yt_dlp import YoutubeDL
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.request import urlopen, Request
from urllib.parse import urlencode
from urllib.error import HTTPError
from configparser import ConfigParser
from contextlib import closing
from pathlib import Path
import sqlite3
import json
CONFIG = ConfigParser()
CONFIG.read("settings.ini")
URLS = json.loads(CONFIG["Pukuthek"]["URLs"])
DATABASE_FILE = CONFIG["Pukuthek"].get("Database", "pukuthek.db")
PEERTUBE_API = CONFIG["PeerTube"]["APIPath"]
PEERTUBE_USER = CONFIG["PeerTube"]["Username"]
PEERTUBE_PASSWORD = CONFIG["PeerTube"]["Password"]
PEERTUBE_CHANNEL = CONFIG["PeerTube"]["ChannelID"]
PARAMS = {
"source_address": "0.0.0.0",
"postprocessors": [
{
"key": "FFmpegVideoConvertor",
"preferedformat": "mp4",
},
{
"key": "FFmpegConcat",
"only_multi_video": False,
"when": "playlist",
},
],
}
####
# Initialize database
db = sqlite3.connect(DATABASE_FILE)
with closing(db.cursor()) as cursor:
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS videos (
id INTEGER,
title TEXT,
date TEXT,
url TEXT,
timestamp INTEGER,
PRIMARY KEY (id, timestamp)
)
"""
)
db.commit()
if not URLS:
print("No URLs given")
exit()
if not PEERTUBE_API.endswith("/"):
PEERTUBE_API += "/"
oauth_lookup_url = PEERTUBE_API + "oauth-clients/local"
oauth_lookup_response = urlopen(oauth_lookup_url)
oauth_lookup_data = json.loads(oauth_lookup_response.read())
oauth_client_id = oauth_lookup_data["client_id"]
oauth_client_secret = oauth_lookup_data["client_secret"]
oauth_url = PEERTUBE_API + "users/token"
oauth_data = {
"client_id": oauth_client_id,
"client_secret": oauth_client_secret,
"grant_type": "password",
"response_type": "code",
"username": PEERTUBE_USER,
"password": PEERTUBE_PASSWORD,
}
oauth_response = urlopen(oauth_url, urlencode(oauth_data).encode("utf-8"))
oauth_result = json.loads(oauth_response.read())
oauth_token = oauth_result["access_token"]
with YoutubeDL(PARAMS) as ydl:
for url in URLS:
try:
page = urlopen(url)
except HTTPError as e:
print(e)
print(e.read())
continue
content = page.read()
soup = BeautifulSoup(content, "html.parser")
datetime_field = soup.find("time").get("datetime")
date = soup.find("span", {"class": "date"}).text
try:
title = (
soup.find("span", {"class": "js-profile"}).text
if soup.find("span", {"class": "js-profile"})
else soup.find("h2", {"class": "js-description-title"}).text
)
except:
print(
f"Cannot find title for {url} - probably doesn't exist anymore, skipping"
)
continue
if soup.find("a", {"class": "livestream-link"}):
if not "is_livestream_over":true" in content.decode():
print(f"{title} ({date}) seems to be live at the moment, skipping")
continue
video_id = url.split("/")[-1]
try:
segments = soup.find_all("h4", {"class": "segment-title"})
except AttributeError:
segments = []
segment_texts = []
for segment in segments:
segment_text = segment.text
segment_texts.append(segment_text)
subtitle = soup.find("p", {"class": "js-description-subtitle"})
description = (
"\n".join(segment_texts)
if segment_texts
else "Keine Beschreibung verfügbar"
)
if subtitle.text:
description = f"{subtitle.text}\n\n{description}"
if not video_id:
video_id = url.split("/")[-2]
datetime = datetime.strptime(datetime_field, "%Y-%m-%dCEST%H:%M:%S")
timestamp = int(datetime.timestamp())
with closing(db.cursor()) as cursor:
cursor.execute(
"""
SELECT * FROM videos
WHERE id = ? AND timestamp = ?
""",
(video_id, timestamp),
)
if cursor.fetchone():
print(f"Skipping {title} ({date})")
continue
ydl.params["outtmpl"]["default"] = f"{timestamp}-%(id)s.%(ext)s"
ydl.download([url])
print(f"Downloaded {title} ({date})")
video_data = open(f"{timestamp}-{video_id}.mp4", "rb").read()
upload_url = PEERTUBE_API + "videos/upload-resumable"
upload_data = {
"channelId": PEERTUBE_CHANNEL,
"filename": f"{timestamp}-{video_id}.mp4",
"name": f"{title} ({date})",
"privacy": "2",
"description": description,
}
upload_request = Request(
upload_url,
data=urlencode(upload_data).encode("utf-8"),
headers={
"content-type": "application/x-www-form-urlencoded",
"X-Upload-Content-Type": "video/mp4",
"X-Upload-Content-Length": len(video_data),
},
)
upload_request.add_header("Authorization", f"Bearer {oauth_token}")
try:
upload_result = urlopen(upload_request)
except HTTPError as e:
print(e)
print(e.read())
exit()
location = upload_result.getheader("Location")
for chunk in range(0, len(video_data), 1024 * 1024):
chunk_upload_url = (
PEERTUBE_API
+ f"videos/upload-resumable?upload_id={location.split('=')[-1]}"
)
chunk_upload_request = Request(
chunk_upload_url,
headers={
"content-type": "application/octet-stream",
},
method="PUT",
)
chunk_upload_request.add_header(
"Content-Range",
f"bytes {chunk}-{min(chunk + 1024 * 1024 - 1, len(video_data) - 1)}/{len(video_data)}",
)
chunk_upload_request.add_header("Authorization", f"Bearer {oauth_token}")
chunk_upload_request.data = video_data[chunk : chunk + 1024 * 1024]
try:
percentage = int(chunk / len(video_data) * 100)
print(
f"Uploading chunk {chunk} / {len(video_data)} ({percentage} %)..."
)
chunk_upload_response = urlopen(chunk_upload_request)
chunk_upload_result = json.loads(chunk_upload_response.read())
except HTTPError as e:
if e.code == 308:
continue
print(e)
print(e.read())
exit()
print(f"Uploaded {title} ({date})")
with closing(db.cursor()) as cursor:
cursor.execute(
"""
INSERT INTO videos
VALUES (?, ?, ?, ?, ?)
""",
(video_id, title, date, url, timestamp),
)
db.commit()
Path(f"{timestamp}-{video_id}.mp4").unlink()