Initial commit
This commit is contained in:
commit
9e930bddbb
9 changed files with 306 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
venv/
|
||||||
|
*.pyc
|
||||||
|
__pycache__/
|
||||||
|
dist/
|
||||||
|
settings.ini
|
19
LICENSE
Normal file
19
LICENSE
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
Copyright (c) 2023 Kumi Systems e.U. <office@kumi.systems>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
19
README.md
Normal file
19
README.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# S3 Downloader
|
||||||
|
|
||||||
|
This is a simple tool to download files from S3. It is intended to be used as a CLI tool, but can also be used as a library.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
* Python 3.8+
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install git+https://kumig.it/kumisystems/s3downloader.git
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
29
pyproject.toml
Normal file
29
pyproject.toml
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "s3downloader"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = [
|
||||||
|
{ name="Kumi Mitterer", email="s3downloader@kumi.email" },
|
||||||
|
]
|
||||||
|
description = "Simple Python CLI tool to download files from S3"
|
||||||
|
readme = "README.md"
|
||||||
|
license = { file="LICENSE" }
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
classifiers = [
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
"boto3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
"Homepage" = "https://kumig.it/kumitterer/s3downloader"
|
||||||
|
"Bug Tracker" = "https://kumig.it/kumitterer/s3downloader/issues"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
s3downloader = "s3downloader.__main__:main"
|
24
settings.dist.ini
Normal file
24
settings.dist.ini
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
[S3]
|
||||||
|
|
||||||
|
# The credentials to your S3 bucket
|
||||||
|
access_key = your-access-key
|
||||||
|
secret_key = your-secret-key
|
||||||
|
|
||||||
|
# The name of your S3 bucket
|
||||||
|
bucket_name = your-bucket
|
||||||
|
|
||||||
|
# The path where files are stored within that bucket
|
||||||
|
# Can be omitted if files are in the base directory
|
||||||
|
path = in/here/
|
||||||
|
|
||||||
|
# Where downloaded files will be stored
|
||||||
|
final_dir = /your/output/directory
|
||||||
|
|
||||||
|
# Delete files from bucket after downloading? (1 = yes, 0 = no)
|
||||||
|
delete = 1
|
||||||
|
|
||||||
|
# To add further arguments for boto3.Client, list them below:
|
||||||
|
#
|
||||||
|
# endpoint_url = https://my.minio.instance/
|
||||||
|
#
|
||||||
|
# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client
|
0
src/s3downloader/__init__.py
Normal file
0
src/s3downloader/__init__.py
Normal file
41
src/s3downloader/__main__.py
Normal file
41
src/s3downloader/__main__.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
from .classes.client import S3Client
|
||||||
|
from .classes.config import Config
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
type=str,
|
||||||
|
default="settings.ini",
|
||||||
|
help="Path to configuration file",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--section",
|
||||||
|
type=str,
|
||||||
|
default="S3",
|
||||||
|
help="Section in configuration file",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--log",
|
||||||
|
type=str,
|
||||||
|
default="INFO",
|
||||||
|
help="Logging level",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=args.log)
|
||||||
|
|
||||||
|
client = S3Client.from_config(args.config, args.section)
|
||||||
|
|
||||||
|
client.process_files()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
118
src/s3downloader/classes/client.py
Normal file
118
src/s3downloader/classes/client.py
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from botocore.exceptions import NoCredentialsError
|
||||||
|
from botocore.client import BaseClient
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from os import PathLike
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from .config import Config
|
||||||
|
|
||||||
|
|
||||||
|
class S3Client:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
access_key: str,
|
||||||
|
secret_key: str,
|
||||||
|
bucket_name: str,
|
||||||
|
path: str,
|
||||||
|
final_dir: PathLike,
|
||||||
|
delete: bool = False,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.s3 = self.connect(access_key, secret_key, **kwargs)
|
||||||
|
self.bucket_name = bucket_name
|
||||||
|
self.path = path
|
||||||
|
self.final_dir = Path(final_dir)
|
||||||
|
self.delete = delete
|
||||||
|
|
||||||
|
self.final_dir.mkdir(exist_ok=True)
|
||||||
|
assert (
|
||||||
|
self.final_dir.is_dir()
|
||||||
|
), "Final directory does not exist or is not a directory"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_config(cls, config_file: PathLike, section: str = "S3") -> "S3Client":
|
||||||
|
config = Config(config_file, section)
|
||||||
|
return cls(
|
||||||
|
config.access_key,
|
||||||
|
config.secret_key,
|
||||||
|
config.bucket_name,
|
||||||
|
config.path,
|
||||||
|
config.final_dir,
|
||||||
|
config.delete,
|
||||||
|
**config.kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def connect(self, access_key: str, secret_key: str, **kwargs) -> BaseClient:
|
||||||
|
logging.debug("Connecting to S3")
|
||||||
|
|
||||||
|
s3 = boto3.client(
|
||||||
|
"s3",
|
||||||
|
aws_access_key_id=access_key,
|
||||||
|
aws_secret_access_key=secret_key,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
return s3
|
||||||
|
|
||||||
|
def list_files(self) -> List[str]:
|
||||||
|
logging.debug("Listing files in S3")
|
||||||
|
|
||||||
|
return [
|
||||||
|
obj["Key"]
|
||||||
|
for obj in self.s3.list_objects(
|
||||||
|
Bucket=self.bucket_name, Prefix=self.path
|
||||||
|
).get("Contents", [])
|
||||||
|
]
|
||||||
|
|
||||||
|
def download_files(self) -> bool:
|
||||||
|
try:
|
||||||
|
logging.debug("Downloading files")
|
||||||
|
for obj in self.list_files():
|
||||||
|
if not self._exists_local(obj):
|
||||||
|
self.download_file(obj)
|
||||||
|
else:
|
||||||
|
logging.warn(f"File already exists locally, skipping: {obj}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download_file(self, filename: str) -> None:
|
||||||
|
logging.info(f"Downloading file from S3: {filename}")
|
||||||
|
with tempfile.TemporaryFile() as temp_file:
|
||||||
|
self.s3.download_fileobj(self.bucket_name, filename, temp_file)
|
||||||
|
temp_file.seek(0)
|
||||||
|
self.move_file(temp_file, filename)
|
||||||
|
|
||||||
|
def move_file(self, temp_file: tempfile.TemporaryFile, filename: str) -> None:
|
||||||
|
logging.debug(f"Moving file to final directory: {filename}")
|
||||||
|
|
||||||
|
with open(self.final_dir / Path(filename).name, "wb") as final_file:
|
||||||
|
final_file.write(temp_file.read())
|
||||||
|
|
||||||
|
def delete_files(self) -> None:
|
||||||
|
logging.debug("Deleting files from S3")
|
||||||
|
for obj in self.list_files():
|
||||||
|
self.delete_file(obj)
|
||||||
|
|
||||||
|
def delete_file(self, filename) -> None:
|
||||||
|
logging.info(f"Deleting file from S3: {filename}")
|
||||||
|
self.s3.delete_object(Bucket=self.bucket_name, Key=filename)
|
||||||
|
|
||||||
|
def process_files(self) -> None:
|
||||||
|
logging.debug("Processing files")
|
||||||
|
|
||||||
|
if self.download_files() and self.delete:
|
||||||
|
self.delete_files()
|
||||||
|
|
||||||
|
def _exists_local(self, filename: str) -> bool:
|
||||||
|
logging.debug(f"Checking if file exists locally: {filename}")
|
||||||
|
|
||||||
|
return Path(self.final_dir / Path(filename).name).exists()
|
51
src/s3downloader/classes/config.py
Normal file
51
src/s3downloader/classes/config.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
from configparser import ConfigParser
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
def __init__(self, config_file=[], section="S3"):
|
||||||
|
logging.debug(f"Reading configuration file(s): {config_file}")
|
||||||
|
|
||||||
|
self.config = ConfigParser()
|
||||||
|
self.config.read(config_file)
|
||||||
|
|
||||||
|
self.section = section
|
||||||
|
|
||||||
|
@property
|
||||||
|
def access_key(self):
|
||||||
|
return self.config[self.section]["access_key"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def secret_key(self):
|
||||||
|
return self.config[self.section]["secret_key"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bucket_name(self):
|
||||||
|
return self.config[self.section]["bucket_name"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self):
|
||||||
|
return self.config[self.section].get("path", "")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def final_dir(self):
|
||||||
|
return self.config[self.section]["final_dir"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def delete(self):
|
||||||
|
return self.config[self.section].getboolean("delete")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kwargs(self):
|
||||||
|
kwargs = {}
|
||||||
|
for key, value in self.config[self.section].items():
|
||||||
|
if not key in [
|
||||||
|
"access_key",
|
||||||
|
"secret_key",
|
||||||
|
"bucket_name",
|
||||||
|
"path",
|
||||||
|
"final_dir",
|
||||||
|
"delete",
|
||||||
|
]:
|
||||||
|
kwargs[key] = value
|
||||||
|
return kwargs
|
Loading…
Reference in a new issue