From 376c1e9292ff497e389563f5e8bd96e3a4e89cf4 Mon Sep 17 00:00:00 2001
From: mister-monster <38917788+mister-monster@users.noreply.github.com>
Date: Fri, 22 Nov 2019 16:35:22 -0600
Subject: [PATCH] added support for languages, tags, categories

---
 youtube2peertube.py | 253 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)

diff --git a/youtube2peertube.py b/youtube2peertube.py
index 72a0c68..c52031e 100644
--- a/youtube2peertube.py
+++ b/youtube2peertube.py
@@ -298,6 +298,259 @@ def run(run_once=True):
             run_steps(conf)
             sleep(poll_frequency)
 
+if __name__ == "__main__":
+    run(run_once=False)
+
+        # if the channel exists in channels_timestamps, update "published" time in the channel line
+        else:
+            published_int = utils.convert_timestamp(published)
+            ctr_line_list = ctr_line.split(",")
+            line_published_int = utils.convert_timestamp(ctr_line_list[1])
+            if published_int > line_published_int:
+                # update the timestamp in the line for the channel in channels_timestamps,
+                ctr.remove(ctr_line)
+                ctr_line = str(channel_id + "," + published + "," + updated + '\n')
+                ctr.append(ctr_line)
+                # and add current videos to queue.
+                queue.append(i)
+        print(published)
+    # write the new channels and timestamps line to channels_timestamps.csv
+    ct = open(channels_timestamps, "w")
+    for line in ctr:
+        if line != '':
+            ct.write(line + "\n")
+    ct.close()
+    return queue, channel_lang
+
+def download_yt_video(queue_item, dl_dir, channel_conf):
+    url = queue_item["link"]
+    dl_dir = dl_dir + channel_conf["name"]
+    try:
+        video = pafy.new(url)
+        streams = video.streams
+        #for s in streams:
+            #print(s.resolution, s.extension, s.get_filesize, s.url)
+        best = video.getbest(preftype=channel_conf["preferred_extension"])
+        filepath = dl_dir + "/"+ queue_item["yt_videoid"] + "." + channel_conf["preferred_extension"]
+        #TODO: implement resolution logic from config, currently downloading best resolution
+        best.download(filepath=filepath, quiet=False)
+
+    except:
+        pass
+        # TODO: check YT alternate URL for video availability
+        # TODO: print and log exceptions
+
+def save_metadata(queue_item, dl_dir, channel_conf):
+    dl_dir = dl_dir + channel_conf["name"]
+    link = queue_item["link"]
+    title = queue_item["title"]
+    description = queue_item["summary"]
+    author = queue_item["author"]
+    published = queue_item["published"]
+    metadata_file = dl_dir + "/" + queue_item["yt_videoid"] + ".txt"
+    metadata = open(metadata_file, "w+")
+    # save relevant metadata as semicolon separated easy to read values to text file
+    metadata.write('title: "' + title + '";\n\nlink: "' + link + '";\n\nauthor: "' + author + '";\n\npublished: "' +
+                   published + '";\n\ndescription: "' + description + '"\n\n;')
+    # save raw metadata JSON string
+    metadata.write(str(queue_item))
+    metadata.close()
+
+def save_thumbnail(queue_item, dl_dir, channel_conf):
+    dl_dir = dl_dir + channel_conf["name"]
+    thumb = str(queue_item["media_thumbnail"][0]["url"])
+    extension = thumb.split(".")[-1]
+    thumb_file = dl_dir + "/" + queue_item["yt_videoid"] + "." + extension
+    # download the thumbnail
+    urlretrieve(thumb, thumb_file)
+    return extension
+
+def get_pt_auth(channel_conf):
+    # get variables from channel_conf
+    pt_api = channel_conf["peertube_instance"] + "/api/v1"
+    pt_uname = channel_conf["peertube_username"]
+    pt_passwd = channel_conf["peertube_password"]
+    # get client ID and secret from peertube instance
+    id_secret = json.loads(str(requests.get(pt_api + "/oauth-clients/local").content).split("'")[1])
+    client_id = id_secret["client_id"]
+    client_secret = id_secret["client_secret"]
+    # construct JSON for post request to get access token
+    auth_json = {'client_id': client_id,
+                 'client_secret': client_secret,
+                 'grant_type': 'password',
+                 'response_type': 'code',
+                 'username': pt_uname,
+                 'password': pt_passwd
+                 }
+    # get access token
+    auth_result = json.loads(str(requests.post(pt_api + "/users/token", data=auth_json).content).split("'")[1])
+    access_token = auth_result["access_token"]
+    return access_token
+
+def get_pt_channel_id(channel_conf):
+    pt_api = channel_conf["peertube_instance"] + "/api/v1"
+    post_url = pt_api + "/video-channels/" + channel_conf["peertube_channel"] + "/"
+    returned_json = json.loads(requests.get(post_url).content)
+    channel_id = returned_json["id"]
+    return channel_id
+
+def get_file(file_path):
+    mimetypes.init()
+    return (path.basename(file_path), open(path.abspath(file_path), 'rb'),
+            mimetypes.types_map[path.splitext(file_path)[1]])
+
+def upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension):
+    # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
+    pt_api = channel_conf["peertube_instance"] + "/api/v1"
+    video_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + \
+                 channel_conf["preferred_extension"]
+    thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
+    description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
+    channel_id = str(get_pt_channel_id(channel_conf))
+    # We need to transform fields into tuple to deal with tags as
+    # MultipartEncoder does not support list refer
+    # https://github.com/requests/toolbelt/issues/190 and
+    # https://github.com/requests/toolbelt/issues/205
+    fields = [
+        ("name", queue_item["title"]),
+        ("licence", "1"),
+        ("description", description),
+        ("nsfw", channel_conf["nsfw"]),
+        ("channelId", channel_id),
+        ("originallyPublishedAt", queue_item["published"]),
+        ("category", channel_conf["pt_channel_category"]),
+        ("language", channel_conf["default_lang"]),
+        ("privacy", str(channel_conf["pt_privacy"])),
+        ("commentsEnabled", channel_conf["comments_enabled"]),
+        ("videofile", get_file(video_file)),
+        ("thumbnailfile", get_file(thumb_file)),
+        ("previewfile", get_file(thumb_file)),
+        ("waitTranscoding", 'false')
+    ]
+
+    if channel_conf["pt_tags"] != "":
+        fields.append(("tags", "[" + channel_conf["pt_tags"] + "]"))
+    else:
+        print("you have no tags in your configuration file for this channel")
+    multipart_data = MultipartEncoder(fields)
+    headers = {
+        'Content-Type': multipart_data.content_type,
+        'Authorization': "Bearer " + access_token
+    }
+    print(requests.post(pt_api + "/videos/upload", data=multipart_data, headers=headers).content)
+
+def pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang):
+    # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
+    pt_api = channel_conf["peertube_instance"] + "/api/v1"
+    yt_video_url = queue_item["link"]
+    # TODO: use the alternate link if video not found error occurs
+    alternate_link = queue_item["links"][0]["href"]
+    thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
+    description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
+    channel_id = str(get_pt_channel_id(channel_conf))
+    language = utils.set_pt_lang(yt_lang, channel_conf["default_lang"])
+    category = utils.set_pt_category(channel_conf["pt_channel_category"])
+    # We need to transform fields into tuple to deal with tags as
+    # MultipartEncoder does not support list refer
+    # https://github.com/requests/toolbelt/issues/190 and
+    # https://github.com/requests/toolbelt/issues/205
+    fields = [
+        ("name", queue_item["title"]),
+        ("licence", "1"),
+        ("description", description),
+        ("nsfw", channel_conf["nsfw"]),
+        ("channelId", channel_id),
+        ("originallyPublishedAt", queue_item["published"]),
+        ("category", category),
+        ("language", language),
+        ("privacy", str(channel_conf["pt_privacy"])),
+        ("commentsEnabled", channel_conf["comments_enabled"]),
+        ("targetUrl", yt_video_url),
+        ("thumbnailfile", get_file(thumb_file)),
+        ("previewfile", get_file(thumb_file)),
+        ("waitTranscoding", 'false')
+    ]
+    if channel_conf["pt_tags"] != "":
+        fields.append(("tags[]", channel_conf["pt_tags"]))
+    else:
+        print("you have no tags in your configuration file for this channel")
+    multipart_data = MultipartEncoder(fields)
+    headers = {
+        'Content-Type': multipart_data.content_type,
+        'Authorization': "Bearer " + access_token
+    }
+    print(requests.post(pt_api + "/videos/imports", data=multipart_data, headers=headers).content)
+
+def run_steps(conf):
+    # TODO: logging
+    channel = conf["channel"]
+    # run loop for every channel in the configuration file
+    global_conf = conf["global"]
+    if conf["global"]["delete_videos"] == "true":
+        delete_videos = True
+    else:
+        delete_videos = False
+    # The following enables the deletion of thumbnails, videos are not downloaded at all
+    if conf["global"]["use_pt_http_import"] == "true":
+        delete_videos = True
+        use_pt_http_import = True
+    else:
+        use_pt_http_import = False
+    dl_dir = global_conf["video_download_dir"]
+    if not path.exists(dl_dir):
+        mkdir(dl_dir)
+    channel_counter = 0
+    for c in channel:
+        print("\n")
+        channel_id = channel[c]["channel_id"]
+        channel_conf = channel[str(channel_counter)]
+        video_data = get_video_data(channel_id)
+        queue = video_data[0]
+        yt_lang = video_data[1]
+        if len(queue) > 0:
+            if not path.exists(dl_dir + "/" + channel_conf["name"]):
+                mkdir(dl_dir + "/" + channel_conf["name"])
+            # download videos, metadata and thumbnails from youtube
+            for queue_item in queue:
+                if not use_pt_http_import:
+                    print("downloading " + queue_item["yt_videoid"] + " from YouTube...")
+                    download_yt_video(queue_item, dl_dir, channel_conf)
+                    print("done.")
+                # TODO: download closest to config specified resolution instead of best resolution
+                thumb_extension = save_thumbnail(queue_item, dl_dir, channel_conf)
+                # only save metadata to text file if archiving videos
+                if not delete_videos:
+                    print("saving video metadata...")
+                    save_metadata(queue_item, dl_dir, channel_conf)
+                    print("done.")
+            access_token = get_pt_auth(channel_conf)
+            # upload videos, metadata and thumbnails to peertube
+            for queue_item in queue:
+                if not use_pt_http_import:
+                    print("uploading " + queue_item["yt_videoid"] + " to Peertube...")
+                    upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension)
+                    print("done.")
+                else:
+                    print("mirroring " + queue_item["link"] + " to Peertube using HTTP import...")
+                    pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang)
+                    print("done.")
+            if delete_videos:
+                print("deleting videos and/or thumbnails...")
+                rmtree(dl_dir + "/" + channel_conf["name"], ignore_errors=True)
+                print("done")
+        channel_counter += 1
+
+def run(run_once=True):
+    #TODO: turn this into a daemon
+    conf = utils.read_conf("config.toml")
+    if run_once:
+        run_steps(conf)
+    else:
+        while True:
+            poll_frequency = int(conf["global"]["poll_frequency"]) * 60
+            run_steps(conf)
+            sleep(poll_frequency)
+
 if __name__ == "__main__":
     run(run_once=False)