Initial commit
This commit is contained in:
commit
9e930bddbb
9 changed files with 306 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
venv/
|
||||
*.pyc
|
||||
__pycache__/
|
||||
dist/
|
||||
settings.ini
|
19
LICENSE
Normal file
19
LICENSE
Normal file
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2023 Kumi Systems e.U. <office@kumi.systems>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
19
README.md
Normal file
19
README.md
Normal file
|
@ -0,0 +1,19 @@
|
|||
# S3 Downloader
|
||||
|
||||
This is a simple tool to download files from S3. It is intended to be used as a CLI tool, but can also be used as a library.
|
||||
|
||||
## Requirements
|
||||
|
||||
* Python 3.8+
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install git+https://kumig.it/kumisystems/s3downloader.git
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
29
pyproject.toml
Normal file
29
pyproject.toml
Normal file
|
@ -0,0 +1,29 @@
|
|||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "s3downloader"
|
||||
version = "0.1.0"
|
||||
authors = [
|
||||
{ name="Kumi Mitterer", email="s3downloader@kumi.email" },
|
||||
]
|
||||
description = "Simple Python CLI tool to download files from S3"
|
||||
readme = "README.md"
|
||||
license = { file="LICENSE" }
|
||||
requires-python = ">=3.10"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dependencies = [
|
||||
"boto3",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://kumig.it/kumitterer/s3downloader"
|
||||
"Bug Tracker" = "https://kumig.it/kumitterer/s3downloader/issues"
|
||||
|
||||
[project.scripts]
|
||||
s3downloader = "s3downloader.__main__:main"
|
24
settings.dist.ini
Normal file
24
settings.dist.ini
Normal file
|
@ -0,0 +1,24 @@
|
|||
[S3]
|
||||
|
||||
# The credentials to your S3 bucket
|
||||
access_key = your-access-key
|
||||
secret_key = your-secret-key
|
||||
|
||||
# The name of your S3 bucket
|
||||
bucket_name = your-bucket
|
||||
|
||||
# The path where files are stored within that bucket
|
||||
# Can be omitted if files are in the base directory
|
||||
path = in/here/
|
||||
|
||||
# Where downloaded files will be stored
|
||||
final_dir = /your/output/directory
|
||||
|
||||
# Delete files from bucket after downloading? (1 = yes, 0 = no)
|
||||
delete = 1
|
||||
|
||||
# To add further arguments for boto3.Client, list them below:
|
||||
#
|
||||
# endpoint_url = https://my.minio.instance/
|
||||
#
|
||||
# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client
|
0
src/s3downloader/__init__.py
Normal file
0
src/s3downloader/__init__.py
Normal file
41
src/s3downloader/__main__.py
Normal file
41
src/s3downloader/__main__.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
from .classes.client import S3Client
|
||||
from .classes.config import Config
|
||||
|
||||
import logging
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--config",
|
||||
type=str,
|
||||
default="settings.ini",
|
||||
help="Path to configuration file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--section",
|
||||
type=str,
|
||||
default="S3",
|
||||
help="Section in configuration file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log",
|
||||
type=str,
|
||||
default="INFO",
|
||||
help="Logging level",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.log)
|
||||
|
||||
client = S3Client.from_config(args.config, args.section)
|
||||
|
||||
client.process_files()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
118
src/s3downloader/classes/client.py
Normal file
118
src/s3downloader/classes/client.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
import boto3
|
||||
|
||||
import tempfile
|
||||
import logging
|
||||
|
||||
from botocore.exceptions import NoCredentialsError
|
||||
from botocore.client import BaseClient
|
||||
|
||||
from pathlib import Path
|
||||
from os import PathLike
|
||||
from typing import List
|
||||
|
||||
from .config import Config
|
||||
|
||||
|
||||
class S3Client:
|
||||
def __init__(
|
||||
self,
|
||||
access_key: str,
|
||||
secret_key: str,
|
||||
bucket_name: str,
|
||||
path: str,
|
||||
final_dir: PathLike,
|
||||
delete: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
self.s3 = self.connect(access_key, secret_key, **kwargs)
|
||||
self.bucket_name = bucket_name
|
||||
self.path = path
|
||||
self.final_dir = Path(final_dir)
|
||||
self.delete = delete
|
||||
|
||||
self.final_dir.mkdir(exist_ok=True)
|
||||
assert (
|
||||
self.final_dir.is_dir()
|
||||
), "Final directory does not exist or is not a directory"
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config_file: PathLike, section: str = "S3") -> "S3Client":
|
||||
config = Config(config_file, section)
|
||||
return cls(
|
||||
config.access_key,
|
||||
config.secret_key,
|
||||
config.bucket_name,
|
||||
config.path,
|
||||
config.final_dir,
|
||||
config.delete,
|
||||
**config.kwargs,
|
||||
)
|
||||
|
||||
def connect(self, access_key: str, secret_key: str, **kwargs) -> BaseClient:
|
||||
logging.debug("Connecting to S3")
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
**kwargs,
|
||||
)
|
||||
return s3
|
||||
|
||||
def list_files(self) -> List[str]:
|
||||
logging.debug("Listing files in S3")
|
||||
|
||||
return [
|
||||
obj["Key"]
|
||||
for obj in self.s3.list_objects(
|
||||
Bucket=self.bucket_name, Prefix=self.path
|
||||
).get("Contents", [])
|
||||
]
|
||||
|
||||
def download_files(self) -> bool:
|
||||
try:
|
||||
logging.debug("Downloading files")
|
||||
for obj in self.list_files():
|
||||
if not self._exists_local(obj):
|
||||
self.download_file(obj)
|
||||
else:
|
||||
logging.warn(f"File already exists locally, skipping: {obj}")
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def download_file(self, filename: str) -> None:
|
||||
logging.info(f"Downloading file from S3: {filename}")
|
||||
with tempfile.TemporaryFile() as temp_file:
|
||||
self.s3.download_fileobj(self.bucket_name, filename, temp_file)
|
||||
temp_file.seek(0)
|
||||
self.move_file(temp_file, filename)
|
||||
|
||||
def move_file(self, temp_file: tempfile.TemporaryFile, filename: str) -> None:
|
||||
logging.debug(f"Moving file to final directory: {filename}")
|
||||
|
||||
with open(self.final_dir / Path(filename).name, "wb") as final_file:
|
||||
final_file.write(temp_file.read())
|
||||
|
||||
def delete_files(self) -> None:
|
||||
logging.debug("Deleting files from S3")
|
||||
for obj in self.list_files():
|
||||
self.delete_file(obj)
|
||||
|
||||
def delete_file(self, filename) -> None:
|
||||
logging.info(f"Deleting file from S3: {filename}")
|
||||
self.s3.delete_object(Bucket=self.bucket_name, Key=filename)
|
||||
|
||||
def process_files(self) -> None:
|
||||
logging.debug("Processing files")
|
||||
|
||||
if self.download_files() and self.delete:
|
||||
self.delete_files()
|
||||
|
||||
def _exists_local(self, filename: str) -> bool:
|
||||
logging.debug(f"Checking if file exists locally: {filename}")
|
||||
|
||||
return Path(self.final_dir / Path(filename).name).exists()
|
51
src/s3downloader/classes/config.py
Normal file
51
src/s3downloader/classes/config.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
from configparser import ConfigParser
|
||||
|
||||
import logging
|
||||
|
||||
class Config:
|
||||
def __init__(self, config_file=[], section="S3"):
|
||||
logging.debug(f"Reading configuration file(s): {config_file}")
|
||||
|
||||
self.config = ConfigParser()
|
||||
self.config.read(config_file)
|
||||
|
||||
self.section = section
|
||||
|
||||
@property
|
||||
def access_key(self):
|
||||
return self.config[self.section]["access_key"]
|
||||
|
||||
@property
|
||||
def secret_key(self):
|
||||
return self.config[self.section]["secret_key"]
|
||||
|
||||
@property
|
||||
def bucket_name(self):
|
||||
return self.config[self.section]["bucket_name"]
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return self.config[self.section].get("path", "")
|
||||
|
||||
@property
|
||||
def final_dir(self):
|
||||
return self.config[self.section]["final_dir"]
|
||||
|
||||
@property
|
||||
def delete(self):
|
||||
return self.config[self.section].getboolean("delete")
|
||||
|
||||
@property
|
||||
def kwargs(self):
|
||||
kwargs = {}
|
||||
for key, value in self.config[self.section].items():
|
||||
if not key in [
|
||||
"access_key",
|
||||
"secret_key",
|
||||
"bucket_name",
|
||||
"path",
|
||||
"final_dir",
|
||||
"delete",
|
||||
]:
|
||||
kwargs[key] = value
|
||||
return kwargs
|
Loading…
Reference in a new issue