From 117997a5d9f4468cc1037959be5fb870e2b7c8a6 Mon Sep 17 00:00:00 2001 From: Kumi Date: Tue, 18 Jun 2024 17:00:05 +0200 Subject: [PATCH] feat: add initial project setup with Flask and Git integration Introduce the initial project structure for gitcloak, a Python-based private frontend for GitHub repositories. Key changes include: - Added .gitignore to exclude virtual environment, bytecode files, and cache directories. - Added LICENSE file using the MIT License. - Created a basic `pyproject.toml` with project metadata and dependencies. - Implemented Flask-based application to render repository structure and file contents. - Added `Git` class to handle interactions with remote GitHub repositories using Dulwich and requests. - Included HTML template for displaying repo content. - Created sample `test.py` for testing Git class methods. This setup enables foundational project functionality and establishes a clear structure for further development. --- .gitignore | 3 ++ LICENSE | 19 +++++++ README.md | 0 pyproject.toml | 22 ++++++++ src/gitcloak/__init__.py | 0 src/gitcloak/app.py | 73 +++++++++++++++++++++++++++ src/gitcloak/classes/__init__.py | 0 src/gitcloak/classes/git.py | 86 ++++++++++++++++++++++++++++++++ src/gitcloak/templates/path.html | 66 ++++++++++++++++++++++++ test.py | 11 ++++ 10 files changed, 280 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 src/gitcloak/__init__.py create mode 100644 src/gitcloak/app.py create mode 100644 src/gitcloak/classes/__init__.py create mode 100644 src/gitcloak/classes/git.py create mode 100644 src/gitcloak/templates/path.html create mode 100644 test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee18991 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +venv/ +*.pyc +__pycache__/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..56a4fc2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 Kumi Mitterer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..81ce80e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "gitcloak" +version = "0.0.1" +authors = [{ name = "Kumi Mitterer", email = "gitcloak@kumi.email" }] +description = "Simple Python-based private frontend for GitHub repositories" +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.10" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +dependencies = ["requests", "flask", "markdown2"] + +[project.urls] +"Homepage" = "https://git.private.coffee/kumi/gitcloak" +"Bug Tracker" = "https://git.private.coffee/kumi/gitcloak/issues" diff --git a/src/gitcloak/__init__.py b/src/gitcloak/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitcloak/app.py b/src/gitcloak/app.py new file mode 100644 index 0000000..9b2f4a1 --- /dev/null +++ b/src/gitcloak/app.py @@ -0,0 +1,73 @@ +from flask import Flask, render_template, jsonify, abort +from .classes.git import Git +import markdown2 +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +handler = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + +app = Flask(__name__) + + +@app.route("///", methods=["GET"]) +@app.route("///tree/main/", methods=["GET"]) +@app.route("///tree/main/", methods=["GET"]) +def get_tree(owner, repo, path=""): + repo_url = f"https://github.com/{owner}/{repo}.git" + git = Git(repo_url) + try: + directory_structure = git.get_directory_structure() + filtered_structure = [ + entry[len(path) :] + for entry in directory_structure + if entry.startswith(path) + ] + + # Separate files and directories + directories = set( + [entry.split("/")[0] for entry in filtered_structure if "/" in entry] + ) + files = [entry for entry in filtered_structure if "/" not in entry] + + # Get README.md content if it exists + readme_content = None + if f"README.md" in files: + readme_md = git.get_file_content(f"{path}/README.md") + readme_content = markdown2.markdown(readme_md.decode("utf-8")) + + return render_template( + "path.html", + owner=owner, + repo=repo, + path=path, + directories=directories, + files=files, + readme_content=readme_content, + ) + except Exception as e: + logger.error( + f"Error getting directory structure for {path} in {owner}/{repo}: {e}" + ) + abort(404, description=str(e)) + + +@app.route("///raw/", methods=["GET"]) +def get_raw(owner, repo, file_path): + repo_url = f"https://github.com/{owner}/{repo}.git" + git = Git(repo_url) + try: + file_content = git.get_file_content(file_path) + return file_content + except Exception as e: + logger.error( + f"Error getting file content for {file_path} in {owner}/{repo}: {e}" + ) + abort(404, description=str(e)) + + +if __name__ == "__main__": + app.run(debug=True, port=8107) diff --git a/src/gitcloak/classes/__init__.py b/src/gitcloak/classes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitcloak/classes/git.py b/src/gitcloak/classes/git.py new file mode 100644 index 0000000..bab038a --- /dev/null +++ b/src/gitcloak/classes/git.py @@ -0,0 +1,86 @@ +import requests +from dulwich.objects import Tree, Blob +from dulwich.client import HttpGitClient, get_transport_and_path +from dulwich.repo import MemoryRepo + +class InMemoryRepo(MemoryRepo): + def get_tree(self, commit_sha): + commit = self.get_object(commit_sha) + return self.get_object(commit.tree) + + def list_tree(self, tree, prefix=""): + for entry in tree.items(): + entry_path = f"{prefix}/{entry.path.decode('utf-8')}" if prefix else entry.path.decode('utf-8') + if isinstance(self.get_object(entry.sha), Tree): + for _ in self.list_tree(self.get_object(entry.sha), entry_path): + yield(_) + else: + yield(entry_path) + + def get_file_content(self, tree, file_path): + parts = file_path.split('/') + for entry in tree.items(): + entry_name = entry.path.decode('utf-8') + if entry_name == parts[0]: + if len(parts) == 1: + file_obj = self.get_object(entry.sha) + if isinstance(file_obj, Blob): + return file_obj.data + else: + raise ValueError(f"Path {file_path} is not a file.") + else: + if isinstance(self.get_object(entry.sha), Tree): + return self.get_file_content(self.get_object(entry.sha), '/'.join(parts[1:])) + else: + raise ValueError(f"Path {file_path} is not a directory.") + raise ValueError(f"File {file_path} not found in the repository.") + +class Git: + def __init__(self, repo_url): + self.repo_url = repo_url.rstrip('/') + self.client = HttpGitClient(self.repo_url) + + def get_remote_refs(self): + client, path = get_transport_and_path(self.repo_url) + refs = client.fetch(path, self.repo) + return refs + + def get_head_commit(self, refs): + return refs[b'HEAD'] + + def get_pack_data(self, commit_sha): + url = f"{self.repo_url}/git-upload-pack" + request_body = f"0032want {commit_sha} multi_ack_detailed side-band-64k thin-pack ofs-delta agent=git/2.28.0\n00000009done\n" + response = requests.post(url, data=request_body.encode('utf-8')) + response.raise_for_status() + return response.content + + def get_directory_structure(self): + # Initialize an in-memory repository + self.repo = InMemoryRepo() + + # Fetch the remote references and objects into the in-memory repository + refs = self.get_remote_refs() + head_commit_hash = self.get_head_commit(refs) + + # Get the tree object for the HEAD commit + tree = self.repo.get_tree(head_commit_hash) + + # List the directory structure + return list(self.repo.list_tree(tree)) + + def get_file_content(self, file_path): + file_path = file_path.lstrip('/') + + # Initialize an in-memory repository + self.repo = InMemoryRepo() + + # Fetch the remote references and objects into the in-memory repository + refs = self.get_remote_refs() + head_commit_hash = self.get_head_commit(refs) + + # Get the tree object for the HEAD commit + tree = self.repo.get_tree(head_commit_hash) + + # Get the file content + return self.repo.get_file_content(tree, file_path) \ No newline at end of file diff --git a/src/gitcloak/templates/path.html b/src/gitcloak/templates/path.html new file mode 100644 index 0000000..b722350 --- /dev/null +++ b/src/gitcloak/templates/path.html @@ -0,0 +1,66 @@ + + + + + + {{ owner }}/{{ repo }} - {{ path }} + + + +

{{ owner }}/{{ repo }} - {{ path }}

+
+

Directories

+
+ {% for directory in directories %} + + {% endfor %} +
+
+
+

Files

+
+ {% for file in files %} + + {% endfor %} +
+
+ {% if readme_content %} +
+

README.md

+
{{ readme_content|safe }}
+
+ {% endif %} + + diff --git a/test.py b/test.py new file mode 100644 index 0000000..16a756a --- /dev/null +++ b/test.py @@ -0,0 +1,11 @@ +from gitcloak.classes.git import Git + +repo = "https://github.com/privatecoffee/transfer.coffee" +git = Git(repo) +print(git.get_directory_structure()) + +file_path = "public/dist/js/webtorrent.LICENSE" +content = git.get_file_content(file_path) +print(f"Type of content: {type(content)}") +content = content.decode('utf-8') +print(f"\nContent of {file_path}:\n{content}") \ No newline at end of file