247 lines
7.2 KiB
Python
247 lines
7.2 KiB
Python
from yt_dlp import YoutubeDL
|
|
from bs4 import BeautifulSoup
|
|
|
|
from datetime import datetime
|
|
from urllib.request import urlopen, Request
|
|
from urllib.parse import urlencode
|
|
from urllib.error import HTTPError
|
|
from configparser import ConfigParser
|
|
from contextlib import closing
|
|
from pathlib import Path
|
|
|
|
import sqlite3
|
|
import json
|
|
|
|
CONFIG = ConfigParser()
|
|
CONFIG.read("settings.ini")
|
|
|
|
URLS = json.loads(CONFIG["Pukuthek"]["URLs"])
|
|
DATABASE_FILE = CONFIG["Pukuthek"].get("Database", "pukuthek.db")
|
|
|
|
PEERTUBE_API = CONFIG["PeerTube"]["APIPath"]
|
|
PEERTUBE_USER = CONFIG["PeerTube"]["Username"]
|
|
PEERTUBE_PASSWORD = CONFIG["PeerTube"]["Password"]
|
|
PEERTUBE_CHANNEL = CONFIG["PeerTube"]["ChannelID"]
|
|
|
|
PARAMS = {
|
|
"source_address": "0.0.0.0",
|
|
"postprocessors": [
|
|
{
|
|
"key": "FFmpegVideoConvertor",
|
|
"preferedformat": "mp4",
|
|
},
|
|
{
|
|
"key": "FFmpegConcat",
|
|
"only_multi_video": False,
|
|
"when": "playlist",
|
|
},
|
|
],
|
|
}
|
|
|
|
####
|
|
|
|
# Initialize database
|
|
|
|
db = sqlite3.connect(DATABASE_FILE)
|
|
|
|
with closing(db.cursor()) as cursor:
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS videos (
|
|
id INTEGER,
|
|
title TEXT,
|
|
date TEXT,
|
|
url TEXT,
|
|
timestamp INTEGER,
|
|
PRIMARY KEY (id, timestamp)
|
|
)
|
|
"""
|
|
)
|
|
db.commit()
|
|
|
|
if not URLS:
|
|
print("No URLs given")
|
|
exit()
|
|
|
|
if not PEERTUBE_API.endswith("/"):
|
|
PEERTUBE_API += "/"
|
|
|
|
oauth_lookup_url = PEERTUBE_API + "oauth-clients/local"
|
|
oauth_lookup_response = urlopen(oauth_lookup_url)
|
|
oauth_lookup_data = json.loads(oauth_lookup_response.read())
|
|
|
|
oauth_client_id = oauth_lookup_data["client_id"]
|
|
oauth_client_secret = oauth_lookup_data["client_secret"]
|
|
|
|
oauth_url = PEERTUBE_API + "users/token"
|
|
oauth_data = {
|
|
"client_id": oauth_client_id,
|
|
"client_secret": oauth_client_secret,
|
|
"grant_type": "password",
|
|
"response_type": "code",
|
|
"username": PEERTUBE_USER,
|
|
"password": PEERTUBE_PASSWORD,
|
|
}
|
|
oauth_response = urlopen(oauth_url, urlencode(oauth_data).encode("utf-8"))
|
|
oauth_result = json.loads(oauth_response.read())
|
|
oauth_token = oauth_result["access_token"]
|
|
|
|
with YoutubeDL(PARAMS) as ydl:
|
|
for url in URLS:
|
|
try:
|
|
page = urlopen(url)
|
|
except HTTPError as e:
|
|
print(e)
|
|
print(e.read())
|
|
continue
|
|
|
|
content = page.read()
|
|
|
|
soup = BeautifulSoup(content, "html.parser")
|
|
|
|
datetime_field = soup.find("time").get("datetime")
|
|
date = soup.find("span", {"class": "date"}).text
|
|
|
|
try:
|
|
title = (
|
|
soup.find("span", {"class": "js-profile"}).text
|
|
if soup.find("span", {"class": "js-profile"})
|
|
else soup.find("h2", {"class": "js-description-title"}).text
|
|
)
|
|
except:
|
|
print(
|
|
f"Cannot find title for {url} - probably doesn't exist anymore, skipping"
|
|
)
|
|
continue
|
|
|
|
if soup.find("a", {"class": "livestream-link"}):
|
|
if not "is_livestream_over":true" in content.decode():
|
|
print(f"{title} ({date}) seems to be live at the moment, skipping")
|
|
continue
|
|
|
|
video_id = url.split("/")[-1]
|
|
|
|
try:
|
|
segments = soup.find_all("h4", {"class": "segment-title"})
|
|
except AttributeError:
|
|
segments = []
|
|
|
|
segment_texts = []
|
|
|
|
for segment in segments:
|
|
segment_text = segment.text
|
|
segment_texts.append(segment_text)
|
|
|
|
subtitle = soup.find("p", {"class": "js-description-subtitle"})
|
|
|
|
description = (
|
|
"\n".join(segment_texts)
|
|
if segment_texts
|
|
else "Keine Beschreibung verfügbar"
|
|
)
|
|
|
|
if subtitle.text:
|
|
description = f"{subtitle.text}\n\n{description}"
|
|
|
|
if not video_id:
|
|
video_id = url.split("/")[-2]
|
|
|
|
datetime = datetime.strptime(datetime_field, "%Y-%m-%dCEST%H:%M:%S")
|
|
timestamp = int(datetime.timestamp())
|
|
|
|
with closing(db.cursor()) as cursor:
|
|
cursor.execute(
|
|
"""
|
|
SELECT * FROM videos
|
|
WHERE id = ? AND timestamp = ?
|
|
""",
|
|
(video_id, timestamp),
|
|
)
|
|
|
|
if cursor.fetchone():
|
|
print(f"Skipping {title} ({date})")
|
|
continue
|
|
|
|
ydl.params["outtmpl"]["default"] = f"{timestamp}-%(id)s.%(ext)s"
|
|
|
|
ydl.download([url])
|
|
|
|
print(f"Downloaded {title} ({date})")
|
|
|
|
video_data = open(f"{timestamp}-{video_id}.mp4", "rb").read()
|
|
|
|
upload_url = PEERTUBE_API + "videos/upload-resumable"
|
|
upload_data = {
|
|
"channelId": PEERTUBE_CHANNEL,
|
|
"filename": f"{timestamp}-{video_id}.mp4",
|
|
"name": f"{title} ({date})",
|
|
"privacy": "2",
|
|
"description": description,
|
|
}
|
|
upload_request = Request(
|
|
upload_url,
|
|
data=urlencode(upload_data).encode("utf-8"),
|
|
headers={
|
|
"content-type": "application/x-www-form-urlencoded",
|
|
"X-Upload-Content-Type": "video/mp4",
|
|
"X-Upload-Content-Length": len(video_data),
|
|
},
|
|
)
|
|
upload_request.add_header("Authorization", f"Bearer {oauth_token}")
|
|
|
|
try:
|
|
upload_result = urlopen(upload_request)
|
|
except HTTPError as e:
|
|
print(e)
|
|
print(e.read())
|
|
exit()
|
|
|
|
location = upload_result.getheader("Location")
|
|
|
|
for chunk in range(0, len(video_data), 1024 * 1024):
|
|
chunk_upload_url = (
|
|
PEERTUBE_API
|
|
+ f"videos/upload-resumable?upload_id={location.split('=')[-1]}"
|
|
)
|
|
chunk_upload_request = Request(
|
|
chunk_upload_url,
|
|
headers={
|
|
"content-type": "application/octet-stream",
|
|
},
|
|
method="PUT",
|
|
)
|
|
chunk_upload_request.add_header(
|
|
"Content-Range",
|
|
f"bytes {chunk}-{min(chunk + 1024 * 1024 - 1, len(video_data) - 1)}/{len(video_data)}",
|
|
)
|
|
chunk_upload_request.add_header("Authorization", f"Bearer {oauth_token}")
|
|
chunk_upload_request.data = video_data[chunk : chunk + 1024 * 1024]
|
|
|
|
try:
|
|
percentage = int(chunk / len(video_data) * 100)
|
|
print(
|
|
f"Uploading chunk {chunk} / {len(video_data)} ({percentage} %)..."
|
|
)
|
|
chunk_upload_response = urlopen(chunk_upload_request)
|
|
chunk_upload_result = json.loads(chunk_upload_response.read())
|
|
except HTTPError as e:
|
|
if e.code == 308:
|
|
continue
|
|
|
|
print(e)
|
|
print(e.read())
|
|
exit()
|
|
|
|
print(f"Uploaded {title} ({date})")
|
|
|
|
with closing(db.cursor()) as cursor:
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO videos
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""",
|
|
(video_id, title, date, url, timestamp),
|
|
)
|
|
db.commit()
|
|
|
|
Path(f"{timestamp}-{video_id}.mp4").unlink()
|