feat: Enhance stability and configurability

- Extended copyright to reflect the current year.
- Incremented project version to indicate new features and fixes.
- Added a new script entry for easier execution, increasing utility and accessibility.
- Updated project URLs for better alignment with current infrastructure.
- Refactored settings path for simplicity and consistency across deployments.
- Improved code readability and maintenance across several modules by cleaning up redundant code, adding missing type annotations, and ensuring consistent code formatting.
- Enhanced logging capabilities and error handling to improve diagnostics and troubleshooting, supporting more robust error recovery mechanisms.
- Implemented more graceful handling of termination signals to ensure clean shutdown and resource cleanup, enhancing the robustness of the application in production environments.
- Introduced command-line argument parsing for configuration file path customization, improving flexibility in different runtime environments.

These changes collectively improve the project's maintainability, reliability, and user experience, laying a stronger foundation for future development.
This commit is contained in:
Kumi 2024-04-22 16:39:33 +02:00
parent 6df9d7b9dc
commit e82ccb2701
Signed by: kumi
GPG key ID: ECBCC9082395383F
18 changed files with 398 additions and 203 deletions

View file

@ -1,4 +1,4 @@
Copyright (c) 2021-2022 Kumi Systems e.U. <office@kumi.systems>
Copyright (c) 2021-2024 Kumi Systems e.U. <office@kumi.systems>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "contentmonster"
version = "0.0.6"
version = "0.0.7"
authors = [
{ name="Kumi Systems e.U.", email="office@kumi.systems" },
]
@ -22,6 +22,10 @@ dependencies = [
"watchdog"
]
[project.scripts]
contentmonster = "contentmonster.worker:main"
[project.urls]
"Homepage" = "https://kumig.it/kumisystems/contentmonster"
"Bug Tracker" = "https://kumig.it/kumisystems/contentmonster/issues"
"Homepage" = "https://git.private.coffee/kumisystems/contentmonster"
"Bug Tracker" = "https://git.private.coffee/kumisystems/contentmonster/issues"
"Source Code" = "https://git.private.coffee/kumisystems/contentmonster"

View file

@ -15,8 +15,8 @@ class Chunk:
chunksize (int): Size of each chunk in bytes
Returns:
classes.chunk.Chunk: A Chunk object containing the portion of the
File object beginning at (count * chunksize) bytes and ending at
classes.chunk.Chunk: A Chunk object containing the portion of the
File object beginning at (count * chunksize) bytes and ending at
((count + 1) * chunksize - 1) bytes
"""
return fileobj.getChunk(count, chunksize)
@ -26,7 +26,7 @@ class Chunk:
Args:
fileobj (classes.file.File): The file object from local storage
count (int): Position of the current chunk in the list of total
count (int): Position of the current chunk in the list of total
chunks (first index: 0) or -1 to get the complete file
data (bytes): Content of the chunk
"""
@ -49,3 +49,6 @@ class Chunk:
str: SHA256 hash of Chunk.data
"""
return hashlib.sha256(self.data).hexdigest()
def getFileSize(self) -> int:
return len(self.data)

View file

@ -41,17 +41,14 @@ class MonsterConfig:
for section in parser.sections():
# Read Directories from the config file
if section.startswith("Directory"):
self.directories.append(
Directory.fromConfig(parser[section]))
self.directories.append(Directory.fromConfig(parser[section]))
# Read Vessels from the config file
elif section.startswith("Vessel"):
self.vessels.append(
Vessel.fromConfig(parser[section], dbclass))
self.vessels.append(Vessel.fromConfig(parser[section], dbclass))
def __init__(self) -> None:
"""Initialize a new (empty) MonsterConfig object
"""
"""Initialize a new (empty) MonsterConfig object"""
self.directories = []
self.vessels = []
self.chunksize = 10485760 # Default: 10 MiB

View file

@ -9,10 +9,11 @@ from typing import Union, Optional
import errno
import stat
from .logger import Logger
class Connection:
"""Class representing an SSH/SFTP connection to a Vessel
"""
"""Class representing an SSH/SFTP connection to a Vessel"""
def __init__(self, vessel):
"""Initialize a new Connection to a Vessel
@ -24,12 +25,20 @@ class Connection:
self._client = SSHClient()
self._client.load_system_host_keys()
self._client.set_missing_host_key_policy(WarningPolicy)
self._client.connect(vessel.address, vessel.port, vessel.username,
vessel.password, timeout=vessel.timeout,
passphrase=vessel.passphrase)
self._client.connect(
vessel.address,
vessel.port,
vessel.username,
vessel.password,
timeout=vessel.timeout,
passphrase=vessel.passphrase,
auth_timeout=vessel.timeout,
banner_timeout=vessel.timeout,
)
self._transport = self._client.get_transport()
self._transport.set_keepalive(10)
self._sftp = self._client.open_sftp()
self._logger = Logger()
def _exists(self, path: Union[str, Path]) -> bool:
"""Check if a path exists on the Vessel. Symlinks are not followed.
@ -97,20 +106,35 @@ class Connection:
"""
return self._sftp.remove(str(path))
def _size(self, path: Union[str, Path]) -> int:
"""Get the size of a file on the Vessel
Args:
path (str, pathlib.Path): Path of the file to check
Returns:
int: Size of the file in bytes
Raises:
FileNotFoundError: Raised if no file is found at the given path
"""
return self._sftp.stat(str(path)).st_size
def assertTempDirectory(self) -> None:
"""Make sure that the temp directory exists on the Vessel
Raises:
ValueError: Raised if the path is already in use on the vessel but
is not a directory.
IOError: Raised if the directory does not exist but cannot be
IOError: Raised if the directory does not exist but cannot be
created.
"""
if not self._exists(self._vessel.tempdir):
self._mkdir(self._vessel.tempdir)
elif not self._isdir(self._vessel.tempdir):
raise ValueError(
f"{self._vessel.tempdir} exists but is not a directory on Vessel {self._vessel.name}!")
f"{self._vessel.tempdir} exists but is not a directory on Vessel {self._vessel.name}!"
)
def assertDirectories(self, directory) -> None:
"""Make sure that destination and temp directories exist on the Vessel
@ -122,7 +146,7 @@ class Connection:
Raises:
ValueError: Raised if a path is already in use on the vessel but
not a directory.
IOError: Raised if a directory that does not exist cannot be
IOError: Raised if a directory that does not exist cannot be
created.
"""
for d in [directory.location, self._vessel.tempdir]:
@ -130,15 +154,18 @@ class Connection:
self._mkdir(d)
elif not self._isdir(d):
raise ValueError(
f"{d} exists but is not a directory on Vessel {self._vessel.name}!")
f"{d} exists but is not a directory on Vessel {self._vessel.name}!"
)
def assertChunkComplete(self, chunk, path: Optional[Union[str, Path]] = None) -> bool:
def assertChunkComplete(
self, chunk: "Chunk", path: Optional[Union[str, Path]] = None
) -> bool:
"""Check if a Chunk has been uploaded correctly
Args:
chunk (classes.chunk.Chunk): Chunk object to verify upload for
path (str, pathlib.Path, optional): Optional path at which to
check. If None, will get default path from Chunk object.
path (str, pathlib.Path, optional): Optional path at which to
check. If None, will get default path from Chunk object.
Defaults to None.
Returns:
@ -154,10 +181,29 @@ class Connection:
o.channel.recv_exit_status()
# Remove the file if it is broken
if not o.readline().split()[0] == chunk.getHash():
complete_output = o.readlines()
filehash = complete_output[0].split()[0]
chunkhash = chunk.getHash()
if filehash != chunkhash:
self._logger.error(
f"Hash mismatch for {chunk.file.uuid} chunk {chunk.count}: {filehash} != {chunkhash}"
)
filesize = self._size(path)
chunksize = chunk.getFileSize()
self._logger.debug(
f"File size: {filesize} bytes, expected chunk size: {chunksize} bytes"
)
self._logger.debug("\n".join(complete_output))
self._remove(path)
else:
self._logger.debug(
f"Hash match for {chunk.file.uuid} chunk {chunk.count}: {filehash} == {chunkhash}"
)
return True
return False
@ -185,16 +231,30 @@ class Connection:
numchunks = remotefile.getStatus() + 1
# Get files in correct order to concatenate
files = " ".join(
[str(self._vessel.tempdir / f"{remotefile.file.uuid}_{i}.part") for i in range(numchunks)])
files = [
str(self._vessel.tempdir / f"{remotefile.file.uuid}_{i}.part")
for i in range(numchunks)
]
for filepath in files:
if not self._exists(filepath):
self._logger.error(f"{filepath} does not exist...?")
return False
completefile = remotefile.file.getChunk(-1)
outname = completefile.getTempName()
outpath = self._vessel.tempdir / outname
_, o, _ = self._client.exec_command(f"cat {files} > {outpath}")
# Block for command to complete
o.channel.recv_exit_status()
if self._exists(outpath):
self._remove(outpath)
for f in files:
_, o, _ = self._client.exec_command(f"cat {f} >> {outpath}")
# Block for command to complete
o.channel.recv_exit_status()
return True
def assertComplete(self, remotefile, allow_retry: bool = False) -> bool:
"""Check if File has been reassembled from Chunks correctly
@ -230,7 +290,7 @@ class Connection:
"""Moves reassembled file to output destination
Args:
remotefile (classes.remotefile.RemoteFile): RemoteFile object
remotefile (classes.remotefile.RemoteFile): RemoteFile object
describing the uploaded File.
Returns:
@ -239,7 +299,8 @@ class Connection:
completefile = remotefile.file.getChunk(-1)
destination = remotefile.file.getFullPath()
self._sftp.posix_rename(
str(self._vessel.tempdir / completefile.getTempName()), str(destination))
str(self._vessel.tempdir / completefile.getTempName()), str(destination)
)
# Make sure that file has actually been created at destination
self._sftp.stat(str(destination))
@ -255,12 +316,10 @@ class Connection:
return f.split("_")[0]
def clearTempDir(self) -> None:
"""Clean up the temporary directory on the Vessel
"""
"""Clean up the temporary directory on the Vessel"""
for f in self._listdir(self._vessel.tempdir):
self._remove(self._vessel.tempdir / f)
def __del__(self):
"""Close SSH connection when ending Connection
"""
"""Close SSH connection when ending Connection"""
self._client.close()

View file

@ -4,10 +4,11 @@ import uuid
from typing import Union, Optional
from .logger import Logger
class Database:
"""Class wrapping sqlite3 database connection
"""
"""Class wrapping sqlite3 database connection"""
def __init__(self, filename: Optional[Union[str, pathlib.Path]] = None):
"""Initialize a new Database object
@ -17,22 +18,34 @@ class Database:
database to use. If None, use "database.sqlite3" in project base
directory. Defaults to None.
"""
filename = filename or pathlib.Path(
__file__).parent.parent.absolute() / "database.sqlite3"
filename = (
filename
or pathlib.Path(__file__).parent.parent.absolute() / "database.sqlite3"
)
self._con = sqlite3.connect(filename)
self.migrate()
def _execute(self, query: str, parameters: Optional[tuple] = None) -> None:
def _execute(
self, query: str, parameters: Optional[tuple] = None, retry: bool = True
) -> None:
"""Execute a query on the database
Args:
query (str): SQL query to execute
parameters (tuple, optional): Parameters to use to replace
placeholders in the query, if any. Defaults to None.
retry (bool, optional): Whether to retry the query if it fails due to
a locked database. Defaults to True.
"""
cur = self.getCursor()
cur.execute(query, parameters)
self.commit() # Instantly commit after every (potential) write action
try:
cur = self.getCursor()
cur.execute(query, parameters)
self.commit() # Instantly commit after every (potential) write action
except sqlite3.OperationalError as e:
self._logger.error(f"An error occurred while writing to the database: {e}")
if retry:
return self._execute(query, parameters, False)
raise
def commit(self) -> None:
"""Commit the current database transaction
@ -60,7 +73,8 @@ class Database:
cur = self.getCursor()
try:
cur.execute(
"SELECT value FROM contentmonster_settings WHERE key = 'dbversion'")
"SELECT value FROM contentmonster_settings WHERE key = 'dbversion'"
)
assert (version := cur.fetchone())
return int(version[0])
except (sqlite3.OperationalError, AssertionError):
@ -78,8 +92,10 @@ class Database:
hash = fileobj.getHash()
cur = self.getCursor()
cur.execute("SELECT uuid, checksum FROM contentmonster_file WHERE directory = ? AND name = ?",
(fileobj.directory.name, fileobj.name))
cur.execute(
"SELECT uuid, checksum FROM contentmonster_file WHERE directory = ? AND name = ?",
(fileobj.directory.name, fileobj.name),
)
fileuuid = None
@ -102,7 +118,7 @@ class Database:
Args:
fileobj (classes.file.File): File object to add to database
hash (str, optional): Checksum of the file, if already known.
hash (str, optional): Checksum of the file, if already known.
Defaults to None.
Returns:
@ -110,8 +126,10 @@ class Database:
"""
hash = hash or fileobj.getHash()
fileuuid = str(uuid.uuid4())
self._execute("INSERT INTO contentmonster_file(uuid, directory, name, checksum) VALUES (?, ?, ?, ?)",
(fileuuid, fileobj.directory.name, fileobj.name, hash))
self._execute(
"INSERT INTO contentmonster_file(uuid, directory, name, checksum) VALUES (?, ?, ?, ?)",
(fileuuid, fileobj.directory.name, fileobj.name, hash),
)
return fileuuid
def getFileByUUID(self, fileuuid: str) -> Optional[tuple[str, str, str]]:
@ -129,20 +147,24 @@ class Database:
"""
cur = self.getCursor()
cur.execute(
"SELECT directory, name, checksum FROM contentmonster_file WHERE uuid = ?", (fileuuid,))
if (result := cur.fetchone()):
"SELECT directory, name, checksum FROM contentmonster_file WHERE uuid = ?",
(fileuuid,),
)
if result := cur.fetchone():
return result
def removeFile(self, directory, name: str) -> None:
"""Remove a File from the database based on Directory and filename
Args:
directory (classes.directory.Directory): Directory object
directory (classes.directory.Directory): Directory object
containing the File to remove
name (str): Filename of the File to remove
"""
self._execute(
"DELETE FROM contentmonster_file WHERE directory = ? AND name = ?", (directory.name, name))
"DELETE FROM contentmonster_file WHERE directory = ? AND name = ?",
(directory.name, name),
)
def removeFileByUUID(self, fileuuid: str) -> None:
"""Remove a File from the database based on UUID
@ -150,19 +172,20 @@ class Database:
Args:
fileuuid (str): The UUID of the File to remove from the database
"""
self._execute(
"DELETE FROM contentmonster_file WHERE uuid = ?", (fileuuid,))
self._execute("DELETE FROM contentmonster_file WHERE uuid = ?", (fileuuid,))
def logCompletion(self, file, vessel):
"""Log the completion of a File upload
Args:
file (classes.file.File): The File object that has been uploaded
vessel (classes.vessel.Vessel): The Vessel the File has been
vessel (classes.vessel.Vessel): The Vessel the File has been
uploaded to
"""
self._execute(
"INSERT INTO contentmonster_file_log(file, vessel) VALUES(?, ?)", (file.uuid, vessel.name))
"INSERT INTO contentmonster_file_log(file, vessel) VALUES(?, ?)",
(file.uuid, vessel.name),
)
def getCompletionForVessel(self, vessel) -> list[Optional[str]]:
"""Get completed uploads for a vessel
@ -176,37 +199,44 @@ class Database:
"""
cur = self.getCursor()
cur.execute(
"SELECT file FROM contentmonster_file_log WHERE vessel = ?", (vessel.name,))
"SELECT file FROM contentmonster_file_log WHERE vessel = ?", (vessel.name,)
)
return [f[0] for f in cur.fetchall()]
def getCompletionByFileUUID(self, fileuuid: str) -> list[Optional[str]]:
cur = self.getCursor()
cur.execute("SELECT vessel FROM contentmonster_file_log WHERE file = ?", (fileuuid,))
cur.execute(
"SELECT vessel FROM contentmonster_file_log WHERE file = ?", (fileuuid,)
)
return [v[0] for v in cur.fetchall()]
def migrate(self) -> None:
"""Apply database migrations
"""
"""Apply database migrations"""
cur = self.getCursor()
if self.getVersion() == 0:
cur.execute(
"CREATE TABLE IF NOT EXISTS contentmonster_settings(key VARCHAR(64) PRIMARY KEY, value TEXT)")
"CREATE TABLE IF NOT EXISTS contentmonster_settings(key VARCHAR(64) PRIMARY KEY, value TEXT)"
)
cur.execute(
"INSERT INTO contentmonster_settings(key, value) VALUES ('dbversion', '1')")
"INSERT INTO contentmonster_settings(key, value) VALUES ('dbversion', '1')"
)
self.commit()
if self.getVersion() == 1:
cur.execute(
"CREATE TABLE IF NOT EXISTS contentmonster_file(uuid VARCHAR(36) PRIMARY KEY, directory VARCHAR(128), name VARCHAR(128), checksum VARCHAR(64))")
cur.execute("CREATE TABLE IF NOT EXISTS contentmonster_file_log(file VARCHAR(36), vessel VARCHAR(128), PRIMARY KEY (file, vessel), FOREIGN KEY (file) REFERENCES contentmonster_files(uuid) ON DELETE CASCADE)")
"CREATE TABLE IF NOT EXISTS contentmonster_file(uuid VARCHAR(36) PRIMARY KEY, directory VARCHAR(128), name VARCHAR(128), checksum VARCHAR(64))"
)
cur.execute(
"UPDATE contentmonster_settings SET value = '2' WHERE key = 'dbversion'")
"CREATE TABLE IF NOT EXISTS contentmonster_file_log(file VARCHAR(36), vessel VARCHAR(128), PRIMARY KEY (file, vessel), FOREIGN KEY (file) REFERENCES contentmonster_files(uuid) ON DELETE CASCADE)"
)
cur.execute(
"UPDATE contentmonster_settings SET value = '2' WHERE key = 'dbversion'"
)
self.commit()
def __del__(self):
"""Close database connection on removal of the Database object
"""
"""Close database connection on removal of the Database object"""
self._con.close()

View file

@ -8,14 +8,14 @@ from typing import Union, Optional
class Directory:
"""Class representing a Directory on the local filesystem
"""
"""Class representing a Directory on the local filesystem"""
@classmethod
def fromConfig(cls, config: SectionProxy):
"""Create Directory object from a Directory section in the Config file
Args:
config (configparser.SectionProxy): Configuration section defining
config (configparser.SectionProxy): Configuration section defining
a Directory
Raises:
@ -28,8 +28,11 @@ class Directory:
if "Location" in config.keys():
return cls(config.name.split()[1], config["Location"])
else:
raise ValueError("Definition for Directory " +
config.name.split()[1] + " does not contain Location!")
raise ValueError(
"Definition for Directory "
+ config.name.split()[1]
+ " does not contain Location!"
)
def __init__(self, name: str, location: Union[str, pathlib.Path]):
"""Initialize a new Directory object
@ -39,7 +42,7 @@ class Directory:
location (str, pathlib.Path): Filesystem location of the Directory
Raises:
ValueError: Raised if passed location does not exist or is not a
ValueError: Raised if passed location does not exist or is not a
directory
"""
self.name = name
@ -50,7 +53,8 @@ class Directory:
else:
location = str(location)
raise ValueError(
f"Location {location} for Directory {name} does not exist or is not a directory.")
f"Location {location} for Directory {name} does not exist or is not a directory."
)
@property
def completeddir(self):
@ -59,7 +63,10 @@ class Directory:
def assertCompletedDirectory(self):
if not os.path.isdir(self.completeddir):
if os.path.isfile(self.completeddir):
raise FileExistsError("Cannot create directory %s - path exists but is not a directory!" % str(self.completeddir))
raise FileExistsError(
"Cannot create directory %s - path exists but is not a directory!"
% str(self.completeddir)
)
os.mkdir(self.completeddir)
@ -71,8 +78,11 @@ class Directory:
Returns:
list: List of File objects for files within the Directory
"""
files = [f for f in os.listdir(self.location) if os.path.isfile(
self.location / f) and os.path.getsize(self.location / f)]
files = [
f
for f in os.listdir(self.location)
if os.path.isfile(self.location / f) and os.path.getsize(self.location / f)
]
return [File(f, self) for f in files]
def getFile(self, name: str) -> Optional[File]:

View file

@ -1,6 +1,11 @@
from watchdog.events import (FileSystemEventHandler, FileSystemEvent,
FileCreatedEvent, FileDeletedEvent,
FileModifiedEvent, FileMovedEvent)
from watchdog.events import (
FileSystemEventHandler,
FileSystemEvent,
FileCreatedEvent,
FileDeletedEvent,
FileModifiedEvent,
FileMovedEvent,
)
from multiprocessing import Queue
@ -11,8 +16,7 @@ import time
class DogHandler(FileSystemEventHandler):
"""Class implementing a watchdog event handler
"""
"""Class implementing a watchdog event handler"""
def __init__(self, directory, queue: Queue, *args, **kwargs) -> None:
"""Initialize a new DogHandler object
@ -71,7 +75,8 @@ class DogHandler(FileSystemEventHandler):
file (source and destination)
"""
self._logger.debug(
f"Detected move event of {event.src_path} to {event.dest_path}")
f"Detected move event of {event.src_path} to {event.dest_path}"
)
self._queue.put((self._directory, os.path.basename(event.src_path)))
self._queue.put((self._directory, os.path.basename(event.dest_path)))

View file

@ -8,10 +8,11 @@ import os.path
class File:
"""Object representing a file found in a local Directory
"""
"""Object representing a file found in a local Directory"""
def __init__(self, name: str, directory, uuid: Optional[str] = None, dbclass: type = Database) -> None:
def __init__(
self, name: str, directory, uuid: Optional[str] = None, dbclass: type = Database
) -> None:
"""Initialize new File object
Args:
@ -31,7 +32,9 @@ class File:
self.dbclass = dbclass
if not self.exists():
raise FileNotFoundError(f"File {self.name} does not exist in {self.directory.name}!")
raise FileNotFoundError(
f"File {self.name} does not exist in {self.directory.name}!"
)
self.uuid = uuid or self.getUUID()
@ -90,7 +93,8 @@ class File:
"""
if count != -1 and not size:
raise ValueError(
"A Chunk size needs to be passed to getChunk() if not using the complete file (-1)!")
"A Chunk size needs to be passed to getChunk() if not using the complete file (-1)!"
)
with open(self.getFullPath(), "rb") as binary:
binary.seek((count * size) if count > 0 else 0)

View file

@ -3,4 +3,7 @@ from logging import Logger as PyLogger
class Logger(PyLogger):
def __init__(self, name="contentmonster"):
super().__init__(name)
super().__init__(name)
def trace(self, msg, *args, **kwargs) -> None:
pass

View file

@ -2,8 +2,7 @@ from ..const import STATUS_COMPLETE, STATUS_START
class RemoteFile:
"""Class describing the transfer status of a File to a Vessel
"""
"""Class describing the transfer status of a File to a Vessel"""
def __init__(self, fileobj, vessel, chunksize: int) -> None:
"""Initialize a new RemoteFile object
@ -29,8 +28,7 @@ class RemoteFile:
# Get all files in the vessel's tempdir
ls = self.vessel.connection._listdir(self.vessel.tempdir)
files = [f for f in ls if f.startswith(
self.file.uuid) and f.endswith(".part")]
files = [f for f in ls if f.startswith(self.file.uuid) and f.endswith(".part")]
# Find the file with the largest chunk number
@ -39,11 +37,14 @@ class RemoteFile:
for f in files:
part = f.split("_")[1].split(".")[0]
if part == "complete": # If a reassembled file is found
if self.validateComplete(True): # and it is not broken
return STATUS_COMPLETE # the upload is complete
if self.validateComplete(True): # and it is not broken
return STATUS_COMPLETE # the upload is complete
# Else save the chunk number if it is larger than the previous
count = max(count, int(part))
try:
count = max(count, int(part))
except:
pass
# Find and return the largest non-corrupt chunk
while count >= 0:
@ -80,8 +81,7 @@ class RemoteFile:
return self.vessel.connection.assertComplete(self, allow_retry)
def compileComplete(self) -> None:
"""Reassemble a complete File from the uploaded Chunks
"""
"""Reassemble a complete File from the uploaded Chunks"""
self.vessel.connection.compileComplete(self)
def getChunk(self, count: int):
@ -91,7 +91,7 @@ class RemoteFile:
count (int): Number of the Chunk to generate
Returns:
classes.chunk.Chunk: A Chunk object containing the portion of the
classes.chunk.Chunk: A Chunk object containing the portion of the
File object beginning at (count * chunksize) bytes and ending at
((count + 1) * chunksize - 1) bytes, with chunksize taken from the
RemoteFile initialization value
@ -99,7 +99,6 @@ class RemoteFile:
return self.file.getChunk(count, self.chunksize)
def finalizeUpload(self) -> None:
"""Move complete file to its final destination and clean up
"""
"""Move complete file to its final destination and clean up"""
self.vessel.connection.moveComplete(self)
self.vessel.connection.clearTempDir()
self.vessel.connection.clearTempDir()

View file

@ -5,8 +5,8 @@ from .logger import Logger
class retry:
"""Decorator used to automatically retry operations throwing exceptions
"""
"""Decorator used to automatically retry operations throwing exceptions"""
def __init__(self, exceptions: tuple[BaseException] = None):
"""Initializing the retry decorator
@ -16,10 +16,14 @@ class retry:
paramiko.ssh_exception.SSHException/NoValidConnectionsError and
socket.timeout/TimeoutError. Defaults to None.
"""
self.exceptions = exceptions or (SSHException, NoValidConnectionsError,
timeout, TimeoutError)
self.exceptions = exceptions or (
SSHException,
NoValidConnectionsError,
timeout,
TimeoutError,
)
self._logger = Logger()
def __call__(self, f):
"""Return a function through the retry decorator
@ -29,6 +33,7 @@ class retry:
Returns:
function: Function wrapping the passed function
"""
def wrapped_f(*args, **kwargs):
while True:
try:
@ -36,4 +41,4 @@ class retry:
except self.exceptions as e:
self._logger.info("Caught expected exception: " + repr(e))
return wrapped_f
return wrapped_f

View file

@ -15,8 +15,8 @@ import os.path
class ShoreThread(Process):
"""Thread handling the discovery of shore-side file changes
"""
"""Thread handling the discovery of shore-side file changes"""
def __init__(self, state: dict, dbclass: type = Database) -> None:
"""Create a new ShoreThread object
@ -48,14 +48,12 @@ class ShoreThread(Process):
return files
def clearFiles(self) -> None:
"""Clear the files variable in the application state
"""
"""Clear the files variable in the application state"""
self._logger.debug("Clearing global files variable")
del self._state["files"][:]
def monitor(self) -> None:
"""Initialize monitoring of Directories specified in configuration
"""
"""Initialize monitoring of Directories specified in configuration"""
for directory in self._state["config"].directories:
self._logger.debug("Creating dog for " + str(directory.location))
handler = DogHandler(directory, self.queue)
@ -65,8 +63,7 @@ class ShoreThread(Process):
self._dogs.append(dog)
def run(self) -> NoReturn:
"""Launch the ShoreThread and start monitoring for file changes
"""
"""Launch the ShoreThread and start monitoring for file changes"""
self._logger.info("Launched Shore Thread")
for f in self.getAllFiles():
@ -79,8 +76,7 @@ class ShoreThread(Process):
self.processQueue()
def joinDogs(self) -> None:
"""Join observers to receive updates on the queue
"""
"""Join observers to receive updates on the queue"""
self._logger.debug("Joining dogs")
for dog in self._dogs:
dog.join(1)
@ -103,7 +99,7 @@ class ShoreThread(Process):
outlist.append(f)
self.clearFiles()
for f in outlist:
self._state["files"].append(f)
@ -118,7 +114,9 @@ class ShoreThread(Process):
Args:
fileobj (classes.file.File): File object to add to the queue
"""
self._logger.debug(f"Adding file {fileobj.name} to directory {fileobj.directory.name}")
self._logger.debug(
f"Adding file {fileobj.name} to directory {fileobj.directory.name}"
)
outlist = []
@ -133,9 +131,13 @@ class ShoreThread(Process):
outlist.append(f)
if self.checkFileCompletion(fileobj):
self._logger.debug(f"File {fileobj.name} already transferred to all Vessels - not re-adding to queue")
self._logger.debug(
f"File {fileobj.name} already transferred to all Vessels - not re-adding to queue"
)
else:
self._logger.debug(f"File {fileobj.name} not in processing queue (anymore) - adding")
self._logger.debug(
f"File {fileobj.name} not in processing queue (anymore) - adding"
)
outlist.append(fileobj)
self.clearFiles()
@ -146,15 +148,18 @@ class ShoreThread(Process):
def checkFileCompletion(self, fileobj: File) -> bool:
db = self._dbclass()
complete = db.getCompletionByFileUUID(fileobj.uuid)
del(db)
del db
for vessel in self._state["config"].vessels:
if vessel.name not in complete and fileobj.directory.name not in vessel._ignoredirs:
if (
vessel.name not in complete
and fileobj.directory.name not in vessel._ignoredirs
):
return False
if fileobj.exists():
fileobj.moveCompleted()
return True
def processFile(self, directory: Directory, name: str) -> None:
@ -163,11 +168,13 @@ class ShoreThread(Process):
Args:
directory (classes.directory.Directory): Directory containing the
created, deleted, modified or moved File
name (str): Filename of the created, deleted, modified or moved
name (str): Filename of the created, deleted, modified or moved
File
"""
self._logger.debug(f"Processing change to file {name} in directory {directory.name}")
if (fileobj := directory.getFile(name)):
self._logger.debug(
f"Processing change to file {name} in directory {directory.name}"
)
if fileobj := directory.getFile(name):
self.addFile(fileobj)
else:
self.purgeFile(directory, name)
@ -180,12 +187,11 @@ class ShoreThread(Process):
File that has been created, moved, modified or deleted.
"""
self._logger.debug("Waiting for new changes...")
directory, basename = self.queue.get() # Will block until an event is found
directory, basename = self.queue.get() # Will block until an event is found
self.processFile(directory, basename)
def terminate(self, *args, **kwargs) -> NoReturn:
"""Terminate observer threads, then terminate self
"""
"""Terminate observer threads, then terminate self"""
self._logger.info("Terminating dogs and shore thread")
for dog in self._dogs:
dog.terminate()

View file

@ -11,14 +11,14 @@ import pathlib
class Vessel:
"""Class describing a Vessel (= a replication destination)
"""
"""Class describing a Vessel (= a replication destination)"""
@classmethod
def fromConfig(cls, config: SectionProxy, dbclass: type = Database):
"""Create Vessel object from a Vessel section in the Config file
Args:
config (configparser.SectionProxy): Vessel section defining a
config (configparser.SectionProxy): Vessel section defining a
Vessel
dbclass (type): Class to use for database connections. Defaults to
built-in Database using sqlite3.
@ -61,17 +61,38 @@ class Vessel:
ignoredirs = [d.strip() for d in config["IgnoreDirs"].split(",")]
if "Address" in config.keys():
return cls(config.name.split()[1], config["Address"], username,
password, passphrase, port, timeout, tempdir, ignoredirs, dbclass)
return cls(
config.name.split()[1],
config["Address"],
username,
password,
passphrase,
port,
timeout,
tempdir,
ignoredirs,
dbclass,
)
else:
raise ValueError("Definition for Vessel " +
config.name.split()[1] + " does not contain Address!")
raise ValueError(
"Definition for Vessel "
+ config.name.split()[1]
+ " does not contain Address!"
)
def __init__(self, name: str, address: str, username: Optional[str] = None,
password: Optional[str] = None, passphrase: Optional[str] = None,
port: Optional[int] = None, timeout: Optional[int] = None,
tempdir: Optional[Union[str, pathlib.Path]] = None,
ignoredirs: list[Optional[str]] = [], dbclass: type = Database) -> None:
def __init__(
self,
name: str,
address: str,
username: Optional[str] = None,
password: Optional[str] = None,
passphrase: Optional[str] = None,
port: Optional[int] = None,
timeout: Optional[int] = None,
tempdir: Optional[Union[str, pathlib.Path]] = None,
ignoredirs: list[Optional[str]] = [],
dbclass: type = Database,
) -> None:
"""Initialize new Vessel object
Args:
@ -141,8 +162,7 @@ class Vessel:
return output
def clearTempDir(self) -> None:
"""Clean up the temporary directory on the Vessel
"""
"""Clean up the temporary directory on the Vessel"""
self.connection.clearTempDir()
def pushChunk(self, chunk, path: Optional[Union[str, pathlib.Path]] = None) -> None:
@ -165,7 +185,7 @@ class Vessel:
remotefile (classes.remotefile.RemoteFile): RemoteFile object
describing the uploaded File
"""
self.connection.compileComplete(remotefile)
return self.connection.compileComplete(remotefile)
def assertDirectories(self, directory) -> None:
"""Make sure that destination and temp directories exist on the Vessel
@ -177,7 +197,7 @@ class Vessel:
Raises:
ValueError: Raised if a path is already in use on the vessel but
not a directory.
IOError: Raised if a directory that does not exist cannot be
IOError: Raised if a directory that does not exist cannot be
created.
"""
self.connection.assertDirectories(directory)
@ -188,7 +208,7 @@ class Vessel:
Raises:
ValueError: Raised if the path is already in use on the vessel but
is not a directory.
IOError: Raised if the directory does not exist but cannot be
IOError: Raised if the directory does not exist but cannot be
created.
"""
self.connection.assertTempDirectory()

View file

@ -1,5 +1,6 @@
from multiprocessing import Process
from typing import NoReturn, Optional
from traceback import format_exception
from .vessel import Vessel
from .remotefile import RemoteFile
@ -10,11 +11,11 @@ from .file import File
from ..const import STATUS_COMPLETE, STATUS_START
import time
import sys
class VesselThread(Process):
"""Thread processing uploads to a single vessel
"""
"""Thread processing uploads to a single vessel"""
def __init__(self, vessel: Vessel, state: dict, dbclass: type = Database) -> None:
"""Initialize a new VesselThread
@ -30,8 +31,7 @@ class VesselThread(Process):
self._dbclass = dbclass
def run(self) -> NoReturn:
"""Run thread and process uploads to the vessel
"""
"""Run thread and process uploads to the vessel"""
self._logger.debug("Launched Vessel Thread for " + self.vessel.name)
self.assertDirectories()
while True:
@ -39,31 +39,39 @@ class VesselThread(Process):
self.upload()
time.sleep(5)
except Exception as e:
self._logger.error("An exception occurred in the Vessel Thread for " +
self.vessel.name)
self._logger.error(repr(e))
exc_type, exc_value, exc_traceback = sys.exc_info()
self._logger.error(
"An exception occurred in the Vessel Thread for " + self.vessel.name
)
self._logger.error(
"\n".join(format_exception(exc_type, exc_value, exc_traceback))
)
@retry()
def assertDirectories(self) -> None:
for directory in self._state["config"].directories:
if not directory.name in self.vessel._ignoredirs:
self._logger.debug(
f"Making sure directory {directory.name} exists on Vessel {self.vessel.name}")
f"Making sure directory {directory.name} exists on Vessel {self.vessel.name}"
)
self.vessel.connection.assertDirectories(directory)
@retry()
def upload(self) -> None:
"""Continue uploading process
"""
"""Continue uploading process"""
if not (current := (self.vessel.currentUpload() or self.processQueue())):
self._logger.debug(
f"No file needs to be uploaded to Vessel {self.vessel.name} at the moment")
f"No file needs to be uploaded to Vessel {self.vessel.name} at the moment"
)
return
if isinstance(current, tuple):
dirname, name, _ = current
self._logger.debug(
f"Found file {name} in directory {dirname} for vessel {self.vessel.name}")
f"Found file {name} in directory {dirname} for vessel {self.vessel.name}"
)
directory = None
@ -74,7 +82,8 @@ class VesselThread(Process):
if not directory:
self._logger.debug(
f"Directory {dirname} not specified in config - deleting File from Vessel {self.vessel.name}")
f"Directory {dirname} not specified in config - deleting File from Vessel {self.vessel.name}"
)
self.vessel.clearTempDir()
return
@ -82,26 +91,28 @@ class VesselThread(Process):
fileobj = File(name, directory)
except FileNotFoundError:
self._logger.debug(
f"File {name} does not exist in Directory {dirname} on shore - deleting from Vessel {self.name}")
f"File {name} does not exist in Directory {dirname} on shore - deleting from Vessel {self.name}"
)
self.vessel.clearTempDir()
return
else:
fileobj = current
remotefile = RemoteFile(fileobj, self.vessel,
self._state["config"].chunksize)
remotefile = RemoteFile(fileobj, self.vessel, self._state["config"].chunksize)
self._logger.debug(
f"Start processing file {fileobj.name} in directory {fileobj.directory.name} on vessel {self.vessel.name}")
f"Start processing file {fileobj.name} in directory {fileobj.directory.name} on vessel {self.vessel.name}"
)
while True:
db = self._dbclass()
if not db.getFileByUUID(fileobj.uuid):
self._logger.debug(
f"File {fileobj.name} in directory {fileobj.directory.name} does not exist anymore - deleting from {self.vessel.name}")
f"File {fileobj.name} in directory {fileobj.directory.name} does not exist anymore - deleting from {self.vessel.name}"
)
self.vessel.clearTempDir()
del(db)
del db
self.vessel.assertDirectories(fileobj.directory)
@ -109,16 +120,18 @@ class VesselThread(Process):
if status == STATUS_COMPLETE:
self._logger.debug(
f"File {fileobj.name} uploaded to vessel {self.vessel.name} completely - finalizing")
f"File {fileobj.name} uploaded to vessel {self.vessel.name} completely - finalizing"
)
remotefile.finalizeUpload()
db = self._dbclass()
db.logCompletion(fileobj, self.vessel)
del(db)
del db
self.vessel._uploaded.append(fileobj.uuid)
self._logger.debug(
f"Moved {fileobj.name} to its final destination on {self.vessel.name} - done!")
f"Moved {fileobj.name} to its final destination on {self.vessel.name} - done!"
)
self.checkFileCompletion(fileobj)
return
@ -126,7 +139,8 @@ class VesselThread(Process):
nextchunk = 0 if status == STATUS_START else status + 1
self._logger.debug(
f"Getting chunk #{nextchunk} for file {fileobj.name} for vessel {self.vessel.name}")
f"Getting chunk #{nextchunk} for file {fileobj.name} for vessel {self.vessel.name}"
)
chunk = remotefile.getChunk(nextchunk)
self._logger.debug("Got chunk")
@ -135,45 +149,50 @@ class VesselThread(Process):
# of the file, i.e. the complete file has already been uploaded
if chunk.data:
self._logger.debug(
f"Uploading chunk to vessel {self.vessel.name}")
self._logger.debug(f"Uploading chunk to vessel {self.vessel.name}")
self.vessel.pushChunk(chunk)
else:
self._logger.debug(
f"No more data to upload to vessel {self.vessel.name} for file {fileobj.name} - compiling")
f"No more data to upload to vessel {self.vessel.name} for file {fileobj.name} - compiling"
)
self.vessel.compileComplete(remotefile)
def checkFileCompletion(self, fileobj: File) -> None:
db = self._dbclass()
complete = db.getCompletionByFileUUID(fileobj.uuid)
del(db)
del db
for vessel in self._state["config"].vessels:
if vessel.name not in complete and fileobj.directory.name not in vessel._ignoredirs:
if (
vessel.name not in complete
and fileobj.directory.name not in vessel._ignoredirs
):
return
self._logger.debug(
f"File {fileobj.name} from Directory {fileobj.directory.name} transferred to all Vessels. Moving out of replication directory.")
f"File {fileobj.name} from Directory {fileobj.directory.name} transferred to all Vessels. Moving out of replication directory."
)
if fileobj.exists():
fileobj.moveCompleted()
def processQueue(self) -> Optional[str]:
"""Return a file from the processing queue
"""
"""Return a file from the processing queue"""
self._logger.debug(
f"Trying to fetch new file for vessel {self.vessel.name} from queue")
f"Trying to fetch new file for vessel {self.vessel.name} from queue"
)
for f in self._state["files"]:
if (not (f.uuid in self.vessel._uploaded)) and (not (f.directory.name in self.vessel._ignoredirs)):
self._logger.debug(
f"Using file {f.name} for vessel {self.vessel.name}")
if (not (f.uuid in self.vessel._uploaded)) and (
not (f.directory.name in self.vessel._ignoredirs)
):
self._logger.debug(f"Using file {f.name} for vessel {self.vessel.name}")
return f
if f.uuid in self.vessel._uploaded:
reason = "already uploaded"
else:
reason = "Directory ignored"