From 376c1e9292ff497e389563f5e8bd96e3a4e89cf4 Mon Sep 17 00:00:00 2001 From: mister-monster <38917788+mister-monster@users.noreply.github.com> Date: Fri, 22 Nov 2019 16:35:22 -0600 Subject: [PATCH] added support for languages, tags, categories --- youtube2peertube.py | 253 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) diff --git a/youtube2peertube.py b/youtube2peertube.py index 72a0c68..c52031e 100644 --- a/youtube2peertube.py +++ b/youtube2peertube.py @@ -298,6 +298,259 @@ def run(run_once=True): run_steps(conf) sleep(poll_frequency) +if __name__ == "__main__": + run(run_once=False) + + # if the channel exists in channels_timestamps, update "published" time in the channel line + else: + published_int = utils.convert_timestamp(published) + ctr_line_list = ctr_line.split(",") + line_published_int = utils.convert_timestamp(ctr_line_list[1]) + if published_int > line_published_int: + # update the timestamp in the line for the channel in channels_timestamps, + ctr.remove(ctr_line) + ctr_line = str(channel_id + "," + published + "," + updated + '\n') + ctr.append(ctr_line) + # and add current videos to queue. + queue.append(i) + print(published) + # write the new channels and timestamps line to channels_timestamps.csv + ct = open(channels_timestamps, "w") + for line in ctr: + if line != '': + ct.write(line + "\n") + ct.close() + return queue, channel_lang + +def download_yt_video(queue_item, dl_dir, channel_conf): + url = queue_item["link"] + dl_dir = dl_dir + channel_conf["name"] + try: + video = pafy.new(url) + streams = video.streams + #for s in streams: + #print(s.resolution, s.extension, s.get_filesize, s.url) + best = video.getbest(preftype=channel_conf["preferred_extension"]) + filepath = dl_dir + "/"+ queue_item["yt_videoid"] + "." + channel_conf["preferred_extension"] + #TODO: implement resolution logic from config, currently downloading best resolution + best.download(filepath=filepath, quiet=False) + + except: + pass + # TODO: check YT alternate URL for video availability + # TODO: print and log exceptions + +def save_metadata(queue_item, dl_dir, channel_conf): + dl_dir = dl_dir + channel_conf["name"] + link = queue_item["link"] + title = queue_item["title"] + description = queue_item["summary"] + author = queue_item["author"] + published = queue_item["published"] + metadata_file = dl_dir + "/" + queue_item["yt_videoid"] + ".txt" + metadata = open(metadata_file, "w+") + # save relevant metadata as semicolon separated easy to read values to text file + metadata.write('title: "' + title + '";\n\nlink: "' + link + '";\n\nauthor: "' + author + '";\n\npublished: "' + + published + '";\n\ndescription: "' + description + '"\n\n;') + # save raw metadata JSON string + metadata.write(str(queue_item)) + metadata.close() + +def save_thumbnail(queue_item, dl_dir, channel_conf): + dl_dir = dl_dir + channel_conf["name"] + thumb = str(queue_item["media_thumbnail"][0]["url"]) + extension = thumb.split(".")[-1] + thumb_file = dl_dir + "/" + queue_item["yt_videoid"] + "." + extension + # download the thumbnail + urlretrieve(thumb, thumb_file) + return extension + +def get_pt_auth(channel_conf): + # get variables from channel_conf + pt_api = channel_conf["peertube_instance"] + "/api/v1" + pt_uname = channel_conf["peertube_username"] + pt_passwd = channel_conf["peertube_password"] + # get client ID and secret from peertube instance + id_secret = json.loads(str(requests.get(pt_api + "/oauth-clients/local").content).split("'")[1]) + client_id = id_secret["client_id"] + client_secret = id_secret["client_secret"] + # construct JSON for post request to get access token + auth_json = {'client_id': client_id, + 'client_secret': client_secret, + 'grant_type': 'password', + 'response_type': 'code', + 'username': pt_uname, + 'password': pt_passwd + } + # get access token + auth_result = json.loads(str(requests.post(pt_api + "/users/token", data=auth_json).content).split("'")[1]) + access_token = auth_result["access_token"] + return access_token + +def get_pt_channel_id(channel_conf): + pt_api = channel_conf["peertube_instance"] + "/api/v1" + post_url = pt_api + "/video-channels/" + channel_conf["peertube_channel"] + "/" + returned_json = json.loads(requests.get(post_url).content) + channel_id = returned_json["id"] + return channel_id + +def get_file(file_path): + mimetypes.init() + return (path.basename(file_path), open(path.abspath(file_path), 'rb'), + mimetypes.types_map[path.splitext(file_path)[1]]) + +def upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension): + # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia + pt_api = channel_conf["peertube_instance"] + "/api/v1" + video_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + \ + channel_conf["preferred_extension"] + thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension + description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"] + channel_id = str(get_pt_channel_id(channel_conf)) + # We need to transform fields into tuple to deal with tags as + # MultipartEncoder does not support list refer + # https://github.com/requests/toolbelt/issues/190 and + # https://github.com/requests/toolbelt/issues/205 + fields = [ + ("name", queue_item["title"]), + ("licence", "1"), + ("description", description), + ("nsfw", channel_conf["nsfw"]), + ("channelId", channel_id), + ("originallyPublishedAt", queue_item["published"]), + ("category", channel_conf["pt_channel_category"]), + ("language", channel_conf["default_lang"]), + ("privacy", str(channel_conf["pt_privacy"])), + ("commentsEnabled", channel_conf["comments_enabled"]), + ("videofile", get_file(video_file)), + ("thumbnailfile", get_file(thumb_file)), + ("previewfile", get_file(thumb_file)), + ("waitTranscoding", 'false') + ] + + if channel_conf["pt_tags"] != "": + fields.append(("tags", "[" + channel_conf["pt_tags"] + "]")) + else: + print("you have no tags in your configuration file for this channel") + multipart_data = MultipartEncoder(fields) + headers = { + 'Content-Type': multipart_data.content_type, + 'Authorization': "Bearer " + access_token + } + print(requests.post(pt_api + "/videos/upload", data=multipart_data, headers=headers).content) + +def pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang): + # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia + pt_api = channel_conf["peertube_instance"] + "/api/v1" + yt_video_url = queue_item["link"] + # TODO: use the alternate link if video not found error occurs + alternate_link = queue_item["links"][0]["href"] + thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension + description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"] + channel_id = str(get_pt_channel_id(channel_conf)) + language = utils.set_pt_lang(yt_lang, channel_conf["default_lang"]) + category = utils.set_pt_category(channel_conf["pt_channel_category"]) + # We need to transform fields into tuple to deal with tags as + # MultipartEncoder does not support list refer + # https://github.com/requests/toolbelt/issues/190 and + # https://github.com/requests/toolbelt/issues/205 + fields = [ + ("name", queue_item["title"]), + ("licence", "1"), + ("description", description), + ("nsfw", channel_conf["nsfw"]), + ("channelId", channel_id), + ("originallyPublishedAt", queue_item["published"]), + ("category", category), + ("language", language), + ("privacy", str(channel_conf["pt_privacy"])), + ("commentsEnabled", channel_conf["comments_enabled"]), + ("targetUrl", yt_video_url), + ("thumbnailfile", get_file(thumb_file)), + ("previewfile", get_file(thumb_file)), + ("waitTranscoding", 'false') + ] + if channel_conf["pt_tags"] != "": + fields.append(("tags[]", channel_conf["pt_tags"])) + else: + print("you have no tags in your configuration file for this channel") + multipart_data = MultipartEncoder(fields) + headers = { + 'Content-Type': multipart_data.content_type, + 'Authorization': "Bearer " + access_token + } + print(requests.post(pt_api + "/videos/imports", data=multipart_data, headers=headers).content) + +def run_steps(conf): + # TODO: logging + channel = conf["channel"] + # run loop for every channel in the configuration file + global_conf = conf["global"] + if conf["global"]["delete_videos"] == "true": + delete_videos = True + else: + delete_videos = False + # The following enables the deletion of thumbnails, videos are not downloaded at all + if conf["global"]["use_pt_http_import"] == "true": + delete_videos = True + use_pt_http_import = True + else: + use_pt_http_import = False + dl_dir = global_conf["video_download_dir"] + if not path.exists(dl_dir): + mkdir(dl_dir) + channel_counter = 0 + for c in channel: + print("\n") + channel_id = channel[c]["channel_id"] + channel_conf = channel[str(channel_counter)] + video_data = get_video_data(channel_id) + queue = video_data[0] + yt_lang = video_data[1] + if len(queue) > 0: + if not path.exists(dl_dir + "/" + channel_conf["name"]): + mkdir(dl_dir + "/" + channel_conf["name"]) + # download videos, metadata and thumbnails from youtube + for queue_item in queue: + if not use_pt_http_import: + print("downloading " + queue_item["yt_videoid"] + " from YouTube...") + download_yt_video(queue_item, dl_dir, channel_conf) + print("done.") + # TODO: download closest to config specified resolution instead of best resolution + thumb_extension = save_thumbnail(queue_item, dl_dir, channel_conf) + # only save metadata to text file if archiving videos + if not delete_videos: + print("saving video metadata...") + save_metadata(queue_item, dl_dir, channel_conf) + print("done.") + access_token = get_pt_auth(channel_conf) + # upload videos, metadata and thumbnails to peertube + for queue_item in queue: + if not use_pt_http_import: + print("uploading " + queue_item["yt_videoid"] + " to Peertube...") + upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension) + print("done.") + else: + print("mirroring " + queue_item["link"] + " to Peertube using HTTP import...") + pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang) + print("done.") + if delete_videos: + print("deleting videos and/or thumbnails...") + rmtree(dl_dir + "/" + channel_conf["name"], ignore_errors=True) + print("done") + channel_counter += 1 + +def run(run_once=True): + #TODO: turn this into a daemon + conf = utils.read_conf("config.toml") + if run_once: + run_steps(conf) + else: + while True: + poll_frequency = int(conf["global"]["poll_frequency"]) * 60 + run_steps(conf) + sleep(poll_frequency) + if __name__ == "__main__": run(run_once=False)