archive_exports rework

This commit is contained in:
2025-07-11 16:29:20 +03:00
parent 5ff7c2aca9
commit d23eef096a
6 changed files with 443 additions and 37 deletions

View File

@@ -192,20 +192,21 @@ def archive_exports(
deduplicate: bool = False,
logger: Optional[SupersetLogger] = None
) -> None:
"""[CONTRACT] Управление архивом экспортированных дашбордов
@pre:
- output_dir должен существовать
- Значения retention должны быть >= 0
@post:
- Сохраняет файлы согласно политике хранения
- Удаляет устаревшие архивы
- Логирует все действия
@raise:
- ValueError: Если retention параметры некорректны
- Exception: При любых других ошибках
"""
# [CONTRACT] Управление архивом экспортированных дашбордов
# @pre:
# - output_dir должен существовать
# - Значения retention должны быть >= 0
# @post:
# - Сохраняет файлы согласно политике хранения
# - Удаляет устаревшие архивы
# - Логирует все действия
# @raise:
# - ValueError: Если retention параметры некорректны
# - Exception: При любых других ошибках
logger = logger or SupersetLogger(name="fileio", console=False)
logger.info(f"[ARCHIVE] Starting archive cleanup in {output_dir}. Deduplication: {deduplicate}")
# [DEBUG_ARCHIVE] Log input parameters
logger.debug(f"[DEBUG_ARCHIVE] archive_exports called with: output_dir={output_dir}, daily={daily_retention}, weekly={weekly_retention}, monthly={monthly_retention}, deduplicate={deduplicate}")
# [VALIDATION] Проверка параметров
if not all(isinstance(x, int) and x >= 0 for x in [daily_retention, weekly_retention, monthly_retention]):
@@ -221,35 +222,54 @@ def archive_exports(
# [PROCESSING] Сбор информации о файлах
files_with_dates = []
for file in export_dir.glob("*.zip"):
zip_files_in_dir = list(export_dir.glob("*.zip"))
# [DEBUG_ARCHIVE] Log number of zip files found
logger.debug(f"[DEBUG_ARCHIVE] Found {len(zip_files_in_dir)} zip files in {export_dir}")
for file in zip_files_in_dir:
# [DEBUG_ARCHIVE] Log file being processed
logger.debug(f"[DEBUG_ARCHIVE] Processing file: {file.name}")
try:
timestamp_str = file.stem.split('_')[-1].split('T')[0]
file_date = datetime.strptime(timestamp_str, "%Y%m%d").date()
logger.debug(f"[DATE_PARSE] Файл {file.name} добавлен к анализу очистки (массив files_with_dates)")
# [DEBUG_ARCHIVE] Log parsed date
logger.debug(f"[DEBUG_ARCHIVE] Parsed date for {file.name}: {file_date}")
except (ValueError, IndexError):
file_date = datetime.fromtimestamp(file.stat().st_mtime).date()
logger.warning(f"[DATE_PARSE] Using modification date for {file.name}")
# [DEBUG_ARCHIVE] Log parsed date (modification date)
logger.debug(f"[DEBUG_ARCHIVE] Parsed date for {file.name} (mod date): {file_date}")
files_with_dates.append((file, file_date))
# [DEDUPLICATION]
if deduplicate:
logger.info("[DEDUPLICATION] Starting checksum-based deduplication.")
logger.info("Начало дедупликации на основе контрольных сумм.")
for file in files_with_dates:
file_path = file[0]
# [DEBUG_ARCHIVE] Log file being checked for deduplication
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Checking file: {file_path.name}")
try:
crc32_checksum = calculate_crc32(file_path)
if crc32_checksum in checksums:
# Duplicate found, delete the older file
logger.warning(f"[DEDUPLICATION] Duplicate found: {file_path}. Deleting.")
# [DEBUG_ARCHIVE][DEDUPLICATION] Log duplicate found and deletion attempt
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Duplicate found: {file_path.name}. Checksum: {crc32_checksum}. Attempting deletion.")
file_path.unlink()
else:
checksums[crc32_checksum] = file_path
# [DEBUG_ARCHIVE][DEDUPLICATION] Log file kept after deduplication check
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Keeping file: {file_path.name}. Checksum: {crc32_checksum}.")
except Exception as e:
logger.error(f"[DEDUPLICATION_ERROR] Error processing {file_path}: {str(e)}", exc_info=True)
# [PROCESSING] Применение политик хранения
# [DEBUG_ARCHIVE] Log files before retention policy
logger.debug(f"[DEBUG_ARCHIVE] Files with dates before retention policy: {[f.name for f, d in files_with_dates]}")
keep_files = apply_retention_policy(
files_with_dates,
daily_retention,
@@ -257,17 +277,26 @@ def archive_exports(
monthly_retention,
logger
)
# [DEBUG_ARCHIVE] Log files to keep after retention policy
logger.debug(f"[DEBUG_ARCHIVE] Files to keep after retention policy: {[f.name for f in keep_files]}")
# [CLEANUP] Удаление устаревших файлов
deleted_count = 0
for file, _ in files_with_dates:
# [DEBUG_ARCHIVE] Check file for deletion
logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name}. Should keep: {file in keep_files}")
if file not in keep_files:
try:
# [DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Log deletion attempt
logger.info(f"[DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Attempting to delete archive: {file.name}")
file.unlink()
deleted_count += 1
logger.info(f"[FILE_REMOVED] Deleted archive: {file.name}")
except OSError as e:
logger.error(f"[FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True)
# [DEBUG_ARCHIVE][FILE_ERROR] Log deletion error
logger.error(f"[DEBUG_ARCHIVE][FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True)
logger.info(f"[ARCHIVE_RESULT] Cleanup completed. Deleted {deleted_count} archives.")