backupcleaner/clean-backups.py

212 lines
5.7 KiB
Python
Raw Normal View History

2022-05-31 00:54:41 +03:00
#!/usr/bin/env python3
2022-05-31 12:30:19 +03:00
"""Backup rotation script"""
2022-05-31 00:54:41 +03:00
from datetime import date, timedelta
import os
import argparse
# Default retention parameters
2022-05-31 12:30:19 +03:00
DEFAULT_DAILY = 7
DEFAULT_WEEKLY = 4
DEFAULT_MONTHLY = 3
2022-05-31 16:50:01 +03:00
DEFAULT_TIMESTAMP_FORMAT = "%Y%m%d"
2022-05-31 00:54:41 +03:00
2022-06-03 11:27:34 +03:00
class BackupFile:
2022-05-31 12:30:19 +03:00
"""
Manipulations with backup files
Arguments:
2022-06-03 11:27:34 +03:00
* retention_daily - daily retention period
2022-05-31 12:30:19 +03:00
* retention_weekly - weekly retention period
2022-06-03 11:27:34 +03:00
* retention_monthly - monthly retention period
2022-05-31 12:30:19 +03:00
* file_path (optional) - file path
* dateformat (optional) - format of timestamps (default is '%Y%m%d')
"""
def __init__(
self,
retention_daily,
retention_weekly,
retention_monthly,
2022-06-03 11:27:34 +03:00
file_path=None,
dateformat="%Y%m%d"
) -> None:
2022-05-31 12:30:19 +03:00
self.file_path = file_path
self.daily = retention_daily
self.weekly = retention_weekly
self.monthly = retention_monthly
2022-05-31 00:54:41 +03:00
self.dateformat = dateformat
2022-06-03 11:27:34 +03:00
curr_date = date.today() # Maybe this will be used to specify date as starting point...
2022-05-31 12:30:19 +03:00
if self.file_path is None:
self.file_name = None
2022-05-31 00:54:41 +03:00
else:
2022-05-31 12:30:19 +03:00
self.file_name = os.path.basename(self.file_path)
dates = []
# Daily
2022-05-31 12:30:19 +03:00
for i in range(0, self.daily):
2022-06-03 11:27:34 +03:00
day = curr_date - timedelta(days=i)
dates.append(day)
# Weekly
monday = curr_date - timedelta(days=date.weekday(curr_date))
2022-06-03 11:27:34 +03:00
for i in range(0, self.weekly):
day = monday - timedelta(days=(i*7))
if day not in dates:
dates.append(day)
# Monthly
day = curr_date.replace(day=1)
2022-06-03 11:27:34 +03:00
for i in range(0, self.monthly):
if day not in dates:
2022-05-31 00:54:41 +03:00
dates.append(day)
day = (day - timedelta(days=1)).replace(day=1)
2022-05-31 00:54:41 +03:00
self.dates = dates
2022-05-31 12:30:19 +03:00
def new_file(
self,
file_path,
2022-06-03 11:27:34 +03:00
retention_daily=None,
retention_weekly=None,
retention_monthly=None,
dateformat=None
):
2022-05-31 12:30:19 +03:00
"""
Create new instance of BackupFile, can be used for retention settings inheritance.
"""
if retention_daily is None:
retention_daily = self.daily
if retention_weekly is None:
retention_weekly = self.weekly
if retention_monthly is None:
retention_monthly = self.monthly
if dateformat is None:
dateformat = self.dateformat
new_file = BackupFile(
retention_daily,
retention_weekly,
retention_monthly,
file_path,
dateformat
2022-05-31 00:54:41 +03:00
)
2022-05-31 12:30:19 +03:00
return new_file
2022-05-31 00:54:41 +03:00
2022-05-31 12:30:19 +03:00
def __str__(self):
val = f"<{self.file_path}>"
return val
def need_remove(self):
"""
Check if file is too old and needs to remove.
"""
if self.file_name is None:
2022-05-31 00:54:41 +03:00
need_remove = False
else:
need_remove = True
2022-05-31 12:30:19 +03:00
for single_date in self.dates:
if single_date.strftime(self.dateformat) in self.file_name:
2022-05-31 00:54:41 +03:00
need_remove = False
break
2022-05-31 12:30:19 +03:00
return need_remove
2022-06-03 11:27:34 +03:00
def remove(self, force_remove=False):
2022-05-31 12:30:19 +03:00
"""
Remove file
Arguments:
* force_remove - suppress remove confirmation
"""
print(f"Removing {self}...")
2022-05-31 00:54:41 +03:00
# Check force option
2022-05-31 12:30:19 +03:00
if force_remove:
os.unlink(self.file_path)
2022-05-31 00:54:41 +03:00
else:
# Remove interactively
print("Are you sure? (y/n) ", end="")
answer = input()
if answer == "y":
2022-05-31 12:30:19 +03:00
os.unlink(self.file_path)
2022-06-03 11:27:34 +03:00
def remove_if_needed(self, force_remove=False):
2022-05-31 12:30:19 +03:00
"""
Remove file if it's too old.
"""
if self.need_remove():
2022-06-03 11:27:34 +03:00
self.remove(force_remove=force_remove)
2022-05-31 00:54:41 +03:00
# Argument parser
2022-05-31 17:47:43 +03:00
parser = argparse.ArgumentParser(
2022-06-03 11:27:34 +03:00
description="Cleanup old backups",
epilog="For a complete timestamp format description, see the python strftime() " +
"documentation: https://docs.python.org/3/library/datetime.html" +
"#strftime-strptime-behavior"
2022-05-31 17:47:43 +03:00
)
# path argument
parser.add_argument(
"path",
2022-06-03 11:27:34 +03:00
metavar="PATH",
type=str,
nargs=1,
help="directory path"
)
# daily argument
parser.add_argument(
"-d", "--daily",
2022-06-03 11:27:34 +03:00
type=int,
default=DEFAULT_DAILY,
metavar="N",
help=f"keep N daily backups, default: {DEFAULT_DAILY}"
)
# weekly argument
parser.add_argument(
"-w", "--weekly",
2022-06-03 11:27:34 +03:00
type=int,
default=DEFAULT_WEEKLY,
metavar="N",
help=f"keep N weekly backups, default: {DEFAULT_WEEKLY}"
)
# monthly argument
parser.add_argument(
"-m", "--monthly",
2022-06-03 11:27:34 +03:00
type=int,
default=DEFAULT_MONTHLY,
metavar="N",
help=f"keep N monthly backups, default: {DEFAULT_MONTHLY}"
)
# force removal
parser.add_argument(
"-f", "--force",
action="store_true",
2022-06-03 11:27:34 +03:00
help="suppress remove confirmation"
)
2022-05-31 16:50:01 +03:00
# timestamp format
parser.add_argument(
"-t",
"--timestamp-format",
2022-06-03 11:27:34 +03:00
type=str,
default=DEFAULT_TIMESTAMP_FORMAT,
metavar="FORMAT",
help=f"format of timestamp, default: {DEFAULT_TIMESTAMP_FORMAT}".replace(r"%", r"%%")
2022-05-31 16:50:01 +03:00
)
args = parser.parse_args()
daily = args.daily
weekly = args.weekly
monthly = args.monthly
force = args.force
directory = args.path[0]
2022-05-31 16:50:01 +03:00
timestamp_format = args.timestamp_format
2022-05-31 00:54:41 +03:00
# File processing
2022-05-31 16:50:01 +03:00
files = BackupFile(
2022-06-03 11:27:34 +03:00
retention_daily=daily,
retention_weekly=weekly,
retention_monthly=monthly,
dateformat=timestamp_format
2022-05-31 16:50:01 +03:00
)
2022-05-31 00:54:41 +03:00
# Generate file list with full paths
2022-05-31 12:30:19 +03:00
paths = [
os.path.join(directory, f) for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))
]
2022-05-31 00:54:41 +03:00
for path in paths:
2022-05-31 12:30:19 +03:00
f = files.new_file(path)
2022-06-03 11:27:34 +03:00
f.remove_if_needed(force_remove=force)