From 828fb667f2582835f10ea27ede573270079c2a49 Mon Sep 17 00:00:00 2001 From: Kumi Date: Sun, 19 May 2024 18:47:22 +0200 Subject: [PATCH] feat: implement PostgreSQL to MinIO backup solution Introduced a new Python script to back up PostgreSQL databases, encrypt the dumps using GPG, and then upload them to MinIO. This solution includes: - `.gitignore` to safeguard against accidentally committing sensitive files. - A clear `LICENSE` and `README.md` for legal clarity and usage documentation. - Sample configuration in `config.dist.yaml` to ease setup. - Defined project dependencies and metadata in `pyproject.toml` for streamlined package management. - Core implementation in `src/postgres_minio_backup` to encapsulate backup logic. This commit sets the foundation for a robust, secure database backup mechanism, facilitating easier disaster recovery and data protection strategies. --- .gitignore | 7 ++ LICENSE | 19 ++++ README.md | 27 +++++ config.dist.yaml | 18 ++++ pyproject.toml | 31 ++++++ src/postgres_minio_backup/__init__.py | 1 + src/postgres_minio_backup/__main__.py | 136 ++++++++++++++++++++++++++ 7 files changed, 239 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 config.dist.yaml create mode 100644 pyproject.toml create mode 100644 src/postgres_minio_backup/__init__.py create mode 100644 src/postgres_minio_backup/__main__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..28b74cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.venv +config.yaml +__pycache__ +*.pyc +.ruff_cache/ +dist/ +venv/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e624f96 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 Kumi Mitterer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1f5e0ff --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# PostgreSQL to MinIO Backup + +This is a simple script to backup a PostgreSQL database, encrypt the dump using GPG, and upload it to MinIO. + +## Installation + +Set up a Python virtual environment and install the requirements: + +```bash +python3 -m venv venv +source venv/bin/activate +pip install -U . +``` + +## Usage + +First, set up a `config.yaml` based on the template provided in [config.dist.yaml](config.dist.yaml). + +Then, run the script: + +```bash +postgres-minio-backup +``` + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/config.dist.yaml b/config.dist.yaml new file mode 100644 index 0000000..381e491 --- /dev/null +++ b/config.dist.yaml @@ -0,0 +1,18 @@ +postgres_hosts: + - host: 'postgres_host_1' + port: '5432' + user: 'postgres_user_1' + password: 'postgres_password_1' + - host: 'postgres_host_2' + port: '5432' + user: 'postgres_user_2' + password: 'postgres_password_2' + +minio: + endpoint: 'your_minio_endpoint' + access_key: 'your_minio_access_key' + secret_key: 'your_minio_secret_key' + bucket_name: 'your_minio_bucket_name' + +gpg: + recipient: 'recipient@example.com' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..fba5180 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "postgres-minio-backup" +version = "0.1.0" +authors = [ + { name="Kumi Mitterer", email="postgres-minio-backup@kumi.email" }, +] +description = "Simple Python script to backup a PostgreSQL database to MinIO" +readme = "README.md" +license = { file="LICENSE" } +requires-python = ">=3.10" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: POSIX", +] +dependencies = [ + "psycopg2-binary", + "boto3", + "pyyaml", +] + +[project.scripts] +postgres-minio-backup = "postgres_minio_backup:main" + +[project.urls] +"Homepage" = "https://git.private.coffee/kumi/postgres-minio-backup" +"Bug Tracker" = "https://git.private.coffee/kumi/postgres-minio-backup/issues" \ No newline at end of file diff --git a/src/postgres_minio_backup/__init__.py b/src/postgres_minio_backup/__init__.py new file mode 100644 index 0000000..15f13d8 --- /dev/null +++ b/src/postgres_minio_backup/__init__.py @@ -0,0 +1 @@ +from .__main__ import main # noqa: F401 \ No newline at end of file diff --git a/src/postgres_minio_backup/__main__.py b/src/postgres_minio_backup/__main__.py new file mode 100644 index 0000000..18c2a08 --- /dev/null +++ b/src/postgres_minio_backup/__main__.py @@ -0,0 +1,136 @@ +import psycopg2 +import subprocess +import boto3 +import os +import tempfile +import yaml +import argparse +import datetime +import logging +from botocore.client import Config + +log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +logging.basicConfig(format=log_format) + + +def load_config(config_path): + """Load configuration from a YAML file.""" + with open(config_path, "r") as config_file: + return yaml.safe_load(config_file) + + +def run_backup(config_path): + """Backup PostgreSQL databases and upload to MinIO.""" + try: + config = load_config(config_path) + except FileNotFoundError: + logging.fatal(f"Configuration file not found: {config_path}") + return + + minio_config = config["minio"] + gpg_recipient = config["gpg"]["recipient"] + + # Initialize MinIO client + s3_client = boto3.client( + "s3", + endpoint_url=minio_config["endpoint"], + aws_access_key_id=minio_config["access_key"], + aws_secret_access_key=minio_config["secret_key"], + config=Config(signature_version="s3v4"), + ) + + for pg_host in config["postgres_hosts"]: + db_host = pg_host["host"] + db_port = pg_host["port"] + db_user = pg_host["user"] + db_password = pg_host["password"] + + # Connect to the PostgreSQL server + conn = psycopg2.connect( + dbname="postgres", + user=db_user, + password=db_password, + host=db_host, + port=db_port, + ) + conn.autocommit = True + + # Get a list of all databases + with conn.cursor() as cursor: + cursor.execute( + "SELECT datname FROM pg_database WHERE datistemplate = false;" + ) + databases = cursor.fetchall() + + for db in databases: + db_name = db[0] + + with tempfile.TemporaryDirectory() as tmpdir: + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + dump_file = os.path.join(tmpdir, f"{db_name}-{timestamp}.sql") + gzip_file = f"{dump_file}.gz" + gpg_file = f"{gzip_file}.gpg" + + # Dump the database + subprocess.run( + [ + "pg_dump", + "-h", + db_host, + "-p", + db_port, + "-U", + db_user, + "-F", + "c", + "-f", + dump_file, + db_name, + ], + check=True, + ) + + # Gzip the dump file + subprocess.run(["gzip", dump_file], check=True) + + # GPG encrypt the gzip file + subprocess.run( + [ + "gpg", + "--output", + gpg_file, + "--encrypt", + "--recipient", + gpg_recipient, + gzip_file, + ], + check=True, + ) + + # Upload to MinIO + with open(gpg_file, "rb") as f: + s3_client.upload_fileobj( + f, minio_config["bucket_name"], os.path.basename(gpg_file) + ) + + # Close the connection + conn.close() + + +def main(): + parser = argparse.ArgumentParser( + description="Backup PostgreSQL databases and upload to MinIO." + ) + parser.add_argument( + "--config", + type=str, + default="config.yaml", + help="Path to the configuration file", + ) + args = parser.parse_args() + + run_backup(args.config) + + +if __name__ == "__main__": + main()