feat: implement PostgreSQL to MinIO backup solution

Introduced a new Python script to back up PostgreSQL databases, encrypt the dumps using GPG, and then upload them to MinIO. This solution includes:
- `.gitignore` to safeguard against accidentally committing sensitive files.
- A clear `LICENSE` and `README.md` for legal clarity and usage documentation.
- Sample configuration in `config.dist.yaml` to ease setup.
- Defined project dependencies and metadata in `pyproject.toml` for streamlined package management.
- Core implementation in `src/postgres_minio_backup` to encapsulate backup logic.

This commit sets the foundation for a robust, secure database backup mechanism, facilitating easier disaster recovery and data protection strategies.
This commit is contained in:
Kumi 2024-05-19 18:47:22 +02:00
commit 828fb667f2
Signed by: kumi
GPG key ID: ECBCC9082395383F
7 changed files with 239 additions and 0 deletions

7
.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
.venv
config.yaml
__pycache__
*.pyc
.ruff_cache/
dist/
venv/

19
LICENSE Normal file
View file

@ -0,0 +1,19 @@
Copyright (c) 2024 Kumi Mitterer <postgres-minio-backup@kumi.email>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

27
README.md Normal file
View file

@ -0,0 +1,27 @@
# PostgreSQL to MinIO Backup
This is a simple script to backup a PostgreSQL database, encrypt the dump using GPG, and upload it to MinIO.
## Installation
Set up a Python virtual environment and install the requirements:
```bash
python3 -m venv venv
source venv/bin/activate
pip install -U .
```
## Usage
First, set up a `config.yaml` based on the template provided in [config.dist.yaml](config.dist.yaml).
Then, run the script:
```bash
postgres-minio-backup
```
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

18
config.dist.yaml Normal file
View file

@ -0,0 +1,18 @@
postgres_hosts:
- host: 'postgres_host_1'
port: '5432'
user: 'postgres_user_1'
password: 'postgres_password_1'
- host: 'postgres_host_2'
port: '5432'
user: 'postgres_user_2'
password: 'postgres_password_2'
minio:
endpoint: 'your_minio_endpoint'
access_key: 'your_minio_access_key'
secret_key: 'your_minio_secret_key'
bucket_name: 'your_minio_bucket_name'
gpg:
recipient: 'recipient@example.com'

31
pyproject.toml Normal file
View file

@ -0,0 +1,31 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "postgres-minio-backup"
version = "0.1.0"
authors = [
{ name="Kumi Mitterer", email="postgres-minio-backup@kumi.email" },
]
description = "Simple Python script to backup a PostgreSQL database to MinIO"
readme = "README.md"
license = { file="LICENSE" }
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX",
]
dependencies = [
"psycopg2-binary",
"boto3",
"pyyaml",
]
[project.scripts]
postgres-minio-backup = "postgres_minio_backup:main"
[project.urls]
"Homepage" = "https://git.private.coffee/kumi/postgres-minio-backup"
"Bug Tracker" = "https://git.private.coffee/kumi/postgres-minio-backup/issues"

View file

@ -0,0 +1 @@
from .__main__ import main # noqa: F401

View file

@ -0,0 +1,136 @@
import psycopg2
import subprocess
import boto3
import os
import tempfile
import yaml
import argparse
import datetime
import logging
from botocore.client import Config
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format)
def load_config(config_path):
"""Load configuration from a YAML file."""
with open(config_path, "r") as config_file:
return yaml.safe_load(config_file)
def run_backup(config_path):
"""Backup PostgreSQL databases and upload to MinIO."""
try:
config = load_config(config_path)
except FileNotFoundError:
logging.fatal(f"Configuration file not found: {config_path}")
return
minio_config = config["minio"]
gpg_recipient = config["gpg"]["recipient"]
# Initialize MinIO client
s3_client = boto3.client(
"s3",
endpoint_url=minio_config["endpoint"],
aws_access_key_id=minio_config["access_key"],
aws_secret_access_key=minio_config["secret_key"],
config=Config(signature_version="s3v4"),
)
for pg_host in config["postgres_hosts"]:
db_host = pg_host["host"]
db_port = pg_host["port"]
db_user = pg_host["user"]
db_password = pg_host["password"]
# Connect to the PostgreSQL server
conn = psycopg2.connect(
dbname="postgres",
user=db_user,
password=db_password,
host=db_host,
port=db_port,
)
conn.autocommit = True
# Get a list of all databases
with conn.cursor() as cursor:
cursor.execute(
"SELECT datname FROM pg_database WHERE datistemplate = false;"
)
databases = cursor.fetchall()
for db in databases:
db_name = db[0]
with tempfile.TemporaryDirectory() as tmpdir:
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
dump_file = os.path.join(tmpdir, f"{db_name}-{timestamp}.sql")
gzip_file = f"{dump_file}.gz"
gpg_file = f"{gzip_file}.gpg"
# Dump the database
subprocess.run(
[
"pg_dump",
"-h",
db_host,
"-p",
db_port,
"-U",
db_user,
"-F",
"c",
"-f",
dump_file,
db_name,
],
check=True,
)
# Gzip the dump file
subprocess.run(["gzip", dump_file], check=True)
# GPG encrypt the gzip file
subprocess.run(
[
"gpg",
"--output",
gpg_file,
"--encrypt",
"--recipient",
gpg_recipient,
gzip_file,
],
check=True,
)
# Upload to MinIO
with open(gpg_file, "rb") as f:
s3_client.upload_fileobj(
f, minio_config["bucket_name"], os.path.basename(gpg_file)
)
# Close the connection
conn.close()
def main():
parser = argparse.ArgumentParser(
description="Backup PostgreSQL databases and upload to MinIO."
)
parser.add_argument(
"--config",
type=str,
default="config.yaml",
help="Path to the configuration file",
)
args = parser.parse_args()
run_backup(args.config)
if __name__ == "__main__":
main()