feat: add homepage and improve markdown processing

- Added an index route and template for the homepage
- Moved to a custom markdown processor to handle relative URLs
- Enhanced CSS for better layout structure and styling
- Improved the directory listing logic to support path filtering
- Updated endpoint to serve raw files from main branch
- Added comprehensive documentation strings to Git class methods

These changes improve user navigation and content display across the application.
This commit is contained in:
Kumi 2024-06-19 08:31:07 +02:00
parent 6edc966f45
commit 621022dfb8
Signed by: kumi
GPG key ID: ECBCC9082395383F
7 changed files with 160 additions and 31 deletions

View file

@ -1,6 +1,6 @@
from flask import Flask, render_template, abort, send_from_directory
from .classes.git import Git
import markdown2
from .classes.markdown import RelativeURLRewriter
import logging
from pathlib import Path
@ -14,6 +14,11 @@ logger.addHandler(handler)
app = Flask(__name__)
@app.route("/")
def index():
return render_template("index.html")
@app.route("/assets/<path:path>")
def send_assets(path):
return send_from_directory(Path(__file__).parent / "assets", path)
@ -29,19 +34,23 @@ def get_tree(owner, repo, path=""):
repo_url = f"https://github.com/{owner}/{repo}.git"
git = Git(repo_url)
try:
directory_structure = git.get_directory_structure()
filtered_structure = [
entry[len(path) :].lstrip("/")
for entry in directory_structure
if entry.startswith(path)
]
directory_structure = git.get_directory_structure(path)
filtered_structure = directory_structure
logger.debug(f"Filtered structure: {filtered_structure}")
# Separate files and directories
directories = sorted(list(set(
[entry.split("/")[0] for entry in filtered_structure if "/" in entry]
)))
directories = sorted(
list(
set(
[
entry.split("/")[0]
for entry in filtered_structure
if "/" in entry
]
)
)
)
files = [entry for entry in filtered_structure if "/" not in entry]
# Get README.md content if it exists
@ -49,7 +58,10 @@ def get_tree(owner, repo, path=""):
if f"README.md" in files:
readme_md = git.get_file_content(f"{path}/README.md")
readme_content = markdown2.markdown(readme_md.decode("utf-8"))
base_url = f"/{owner}/{repo}/raw/main/{path}".rstrip("/")
readme_content = RelativeURLRewriter(base_url).convert(
readme_md.decode("utf-8")
)
return render_template(
"path.html",
@ -67,7 +79,7 @@ def get_tree(owner, repo, path=""):
abort(404, description=str(e))
@app.route("/<owner>/<repo>/raw/<path:file_path>", methods=["GET"])
@app.route("/<owner>/<repo>/raw/main/<path:file_path>", methods=["GET"])
def get_raw(owner, repo, file_path):
repo_url = f"https://github.com/{owner}/{repo}.git"
git = Git(repo_url)

View file

@ -5,6 +5,15 @@ body {
height: 100%;
}
body {
display: flex;
flex-direction: column;
}
.container {
flex: 1;
}
.directory a,
.file a {
text-decoration: none;
@ -33,6 +42,9 @@ body {
.icon {
margin-right: 5px;
width: 1em;
height: 1em;
vertical-align: middle;
}
.list-group-item {
@ -44,3 +56,19 @@ body {
font-size: 1.5rem;
padding: 0 10px;
}
.footer {
position: fixed;
bottom: 0;
width: 100%;
text-align: center;
}
.text-white {
color: white;
}
.text-white a {
color: white;
text-decoration: underline;
}

View file

@ -1,26 +1,67 @@
import requests
from dulwich.objects import Tree, Blob
import logging
from dulwich.objects import Tree, Blob, ShaFile, Tree
from dulwich.client import HttpGitClient, get_transport_and_path
from dulwich.repo import MemoryRepo
class InMemoryRepo(MemoryRepo):
def get_tree(self, commit_sha):
def get_tree(self, commit_sha: bytes) -> Tree:
"""Return the tree object for the given commit.
Args:
commit_sha (bytes): The commit hash.
Returns:
ShaFile: The tree object.
"""
commit = self.get_object(commit_sha)
return self.get_object(commit.tree)
def list_tree(self, tree, prefix=""):
def list_tree(self, tree, path="", prefix=""):
"""List the directory structure of the tree object.
Args:
tree (Tree): The tree object.
path (str): The path within the tree object.
prefix (str): The prefix to be added to the path.
Yields:
str: The path of the file or directory.
"""
logging.debug(f"Listing tree {tree.sha()} with path {path}, prefix {prefix}")
for entry in tree.items():
entry_path = f"{prefix}/{entry.path.decode('utf-8')}" if prefix else entry.path.decode('utf-8')
entry_path = (
f"{prefix}/{entry.path.decode('utf-8')}"
if prefix
else entry.path.decode("utf-8")
)
if path:
path_parts = path.split("/")
if path_parts[0] != entry.path.decode("utf-8"):
continue
if isinstance(self.get_object(entry.sha), Tree):
for _ in self.list_tree(self.get_object(entry.sha), entry_path):
yield(_)
if path:
for _ in self.list_tree(
self.get_object(entry.sha), path="/".join(path_parts[1:]), prefix="/".join(path_parts[1:])
):
yield (_)
else:
yield(entry_path)
for _ in self.list_tree(
self.get_object(entry.sha), prefix=entry_path
):
yield (_)
else:
yield (entry_path)
def get_file_content(self, tree, file_path):
parts = file_path.split('/')
parts = file_path.split("/")
for entry in tree.items():
entry_name = entry.path.decode('utf-8')
entry_name = entry.path.decode("utf-8")
if entry_name == parts[0]:
if len(parts) == 1:
file_obj = self.get_object(entry.sha)
@ -30,14 +71,17 @@ class InMemoryRepo(MemoryRepo):
raise ValueError(f"Path {file_path} is not a file.")
else:
if isinstance(self.get_object(entry.sha), Tree):
return self.get_file_content(self.get_object(entry.sha), '/'.join(parts[1:]))
return self.get_file_content(
self.get_object(entry.sha), "/".join(parts[1:])
)
else:
raise ValueError(f"Path {file_path} is not a directory.")
raise ValueError(f"File {file_path} not found in the repository.")
class Git:
def __init__(self, repo_url):
self.repo_url = repo_url.rstrip('/')
self.repo_url = repo_url.rstrip("/")
self.client = HttpGitClient(self.repo_url)
def get_remote_refs(self):
@ -46,16 +90,16 @@ class Git:
return refs
def get_head_commit(self, refs):
return refs[b'HEAD']
return refs[b"HEAD"]
def get_pack_data(self, commit_sha):
url = f"{self.repo_url}/git-upload-pack"
request_body = f"0032want {commit_sha} multi_ack_detailed side-band-64k thin-pack ofs-delta agent=git/2.28.0\n00000009done\n"
response = requests.post(url, data=request_body.encode('utf-8'))
response = requests.post(url, data=request_body.encode("utf-8"))
response.raise_for_status()
return response.content
def get_directory_structure(self):
def get_directory_structure(self, path=""):
# Initialize an in-memory repository
self.repo = InMemoryRepo()
@ -67,10 +111,10 @@ class Git:
tree = self.repo.get_tree(head_commit_hash)
# List the directory structure
return list(self.repo.list_tree(tree))
return list(self.repo.list_tree(tree, path=path))
def get_file_content(self, file_path):
file_path = file_path.lstrip('/')
file_path = file_path.lstrip("/")
# Initialize an in-memory repository
self.repo = InMemoryRepo()

View file

@ -0,0 +1,20 @@
import re
import markdown2
class RelativeURLRewriter(markdown2.Markdown):
def __init__(self, base_url, *args, **kwargs):
self.base_url = base_url
super().__init__(*args, **kwargs)
def postprocess(self, text):
# Rewrite relative URLs
def replace_url(match):
url = match.group(1)
print(f"URL: {match}")
if not (":" in url or url.startswith("/") or url.startswith("#") or url.startswith("md5-")):
return f'src="{self.base_url}/{url}"'
return match.group(0)
text = re.sub(r'src="([^"]+)"', replace_url, text)
text = re.sub(r'href="([^"]+)"', replace_url, text)
return text

View file

@ -8,11 +8,16 @@
<link href="/assets/css/style.css" rel="stylesheet">
</head>
<body>
<nav class="navbar navbar-expand-lg navbar-dark bg-dark">
<nav class="navbar navbar-dark bg-dark">
<a class="navbar-brand" href="/">GitCloak</a>
</nav>
<div class="container mt-4">
{% block content %}{% endblock %}
</div>
<footer class="footer mt-auto py-3 bg-dark text-white">
<div class="container">
<span>GitCloak is brought to you by <a href="https://private.coffee">Private.coffee</a>.</span>
</div>
</footer>
</body>
</html>

View file

@ -0,0 +1,20 @@
{% extends 'base.html' %}
{% block title %}Welcome to GitCloak{% endblock %}
{% block content %}
<div class="jumbotron">
<h1 class="display-8">Welcome to GitCloak!</h1>
<p class="lead">GitCloak is a simple web frontend that lets you browse public GitHub repositories and view their contents.</p>
<p class="lead">Unlike other GitHub interfaces, GitCloak does not use APIs or screen scraping, needs no credentials and does not use any JavaScript.</p>
<hr class="my-4">
<p>To get started, simply replace <code>github.com</code> in the URL - the following paths are already available:</p>
<ul>
<li><code>/&lt;owner&gt;/&lt;repo&gt;/</code> - View the root directory of the repository</li>
<li><code>/&lt;owner&gt;/&lt;repo&gt;/tree/main/&lt;path&gt;</code> - View a specific directory</li>
<li><code>/&lt;owner&gt;/&lt;repo&gt;/raw/&lt;file_path&gt;</code> - View the raw content of a specific file</li>
</ul>
<p>For example, to see the contents of the <code>PrivateCoffee/transfer.coffee</code> repository, simply visit <a href="/PrivateCoffee/transfer.coffee/">/PrivateCoffee/transfer.coffee/</a>.</p>
<p>Other features are still in development, so stay tuned!</p>
</div>
{% endblock %}

View file

@ -2,7 +2,7 @@ from gitcloak.classes.git import Git
repo = "https://github.com/privatecoffee/transfer.coffee"
git = Git(repo)
print(git.get_directory_structure())
print(git.get_directory_structure("public/"))
file_path = "public/dist/js/webtorrent.LICENSE"
content = git.get_file_content(file_path)