archive_exports rework
This commit is contained in:
@@ -192,20 +192,21 @@ def archive_exports(
|
||||
deduplicate: bool = False,
|
||||
logger: Optional[SupersetLogger] = None
|
||||
) -> None:
|
||||
"""[CONTRACT] Управление архивом экспортированных дашбордов
|
||||
@pre:
|
||||
- output_dir должен существовать
|
||||
- Значения retention должны быть >= 0
|
||||
@post:
|
||||
- Сохраняет файлы согласно политике хранения
|
||||
- Удаляет устаревшие архивы
|
||||
- Логирует все действия
|
||||
@raise:
|
||||
- ValueError: Если retention параметры некорректны
|
||||
- Exception: При любых других ошибках
|
||||
"""
|
||||
# [CONTRACT] Управление архивом экспортированных дашбордов
|
||||
# @pre:
|
||||
# - output_dir должен существовать
|
||||
# - Значения retention должны быть >= 0
|
||||
# @post:
|
||||
# - Сохраняет файлы согласно политике хранения
|
||||
# - Удаляет устаревшие архивы
|
||||
# - Логирует все действия
|
||||
# @raise:
|
||||
# - ValueError: Если retention параметры некорректны
|
||||
# - Exception: При любых других ошибках
|
||||
logger = logger or SupersetLogger(name="fileio", console=False)
|
||||
logger.info(f"[ARCHIVE] Starting archive cleanup in {output_dir}. Deduplication: {deduplicate}")
|
||||
# [DEBUG_ARCHIVE] Log input parameters
|
||||
logger.debug(f"[DEBUG_ARCHIVE] archive_exports called with: output_dir={output_dir}, daily={daily_retention}, weekly={weekly_retention}, monthly={monthly_retention}, deduplicate={deduplicate}")
|
||||
|
||||
# [VALIDATION] Проверка параметров
|
||||
if not all(isinstance(x, int) and x >= 0 for x in [daily_retention, weekly_retention, monthly_retention]):
|
||||
@@ -221,35 +222,54 @@ def archive_exports(
|
||||
|
||||
# [PROCESSING] Сбор информации о файлах
|
||||
files_with_dates = []
|
||||
for file in export_dir.glob("*.zip"):
|
||||
zip_files_in_dir = list(export_dir.glob("*.zip"))
|
||||
# [DEBUG_ARCHIVE] Log number of zip files found
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Found {len(zip_files_in_dir)} zip files in {export_dir}")
|
||||
|
||||
for file in zip_files_in_dir:
|
||||
# [DEBUG_ARCHIVE] Log file being processed
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Processing file: {file.name}")
|
||||
try:
|
||||
timestamp_str = file.stem.split('_')[-1].split('T')[0]
|
||||
file_date = datetime.strptime(timestamp_str, "%Y%m%d").date()
|
||||
logger.debug(f"[DATE_PARSE] Файл {file.name} добавлен к анализу очистки (массив files_with_dates)")
|
||||
# [DEBUG_ARCHIVE] Log parsed date
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Parsed date for {file.name}: {file_date}")
|
||||
except (ValueError, IndexError):
|
||||
file_date = datetime.fromtimestamp(file.stat().st_mtime).date()
|
||||
logger.warning(f"[DATE_PARSE] Using modification date for {file.name}")
|
||||
# [DEBUG_ARCHIVE] Log parsed date (modification date)
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Parsed date for {file.name} (mod date): {file_date}")
|
||||
|
||||
|
||||
files_with_dates.append((file, file_date))
|
||||
|
||||
|
||||
|
||||
# [DEDUPLICATION]
|
||||
if deduplicate:
|
||||
logger.info("[DEDUPLICATION] Starting checksum-based deduplication.")
|
||||
logger.info("Начало дедупликации на основе контрольных сумм.")
|
||||
for file in files_with_dates:
|
||||
file_path = file[0]
|
||||
# [DEBUG_ARCHIVE] Log file being checked for deduplication
|
||||
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Checking file: {file_path.name}")
|
||||
try:
|
||||
crc32_checksum = calculate_crc32(file_path)
|
||||
if crc32_checksum in checksums:
|
||||
# Duplicate found, delete the older file
|
||||
logger.warning(f"[DEDUPLICATION] Duplicate found: {file_path}. Deleting.")
|
||||
# [DEBUG_ARCHIVE][DEDUPLICATION] Log duplicate found and deletion attempt
|
||||
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Duplicate found: {file_path.name}. Checksum: {crc32_checksum}. Attempting deletion.")
|
||||
file_path.unlink()
|
||||
else:
|
||||
checksums[crc32_checksum] = file_path
|
||||
# [DEBUG_ARCHIVE][DEDUPLICATION] Log file kept after deduplication check
|
||||
logger.debug(f"[DEBUG_ARCHIVE][DEDUPLICATION] Keeping file: {file_path.name}. Checksum: {crc32_checksum}.")
|
||||
except Exception as e:
|
||||
logger.error(f"[DEDUPLICATION_ERROR] Error processing {file_path}: {str(e)}", exc_info=True)
|
||||
|
||||
# [PROCESSING] Применение политик хранения
|
||||
# [DEBUG_ARCHIVE] Log files before retention policy
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Files with dates before retention policy: {[f.name for f, d in files_with_dates]}")
|
||||
keep_files = apply_retention_policy(
|
||||
files_with_dates,
|
||||
daily_retention,
|
||||
@@ -257,17 +277,26 @@ def archive_exports(
|
||||
monthly_retention,
|
||||
logger
|
||||
)
|
||||
# [DEBUG_ARCHIVE] Log files to keep after retention policy
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Files to keep after retention policy: {[f.name for f in keep_files]}")
|
||||
|
||||
|
||||
# [CLEANUP] Удаление устаревших файлов
|
||||
deleted_count = 0
|
||||
for file, _ in files_with_dates:
|
||||
# [DEBUG_ARCHIVE] Check file for deletion
|
||||
logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name}. Should keep: {file in keep_files}")
|
||||
if file not in keep_files:
|
||||
try:
|
||||
# [DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Log deletion attempt
|
||||
logger.info(f"[DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Attempting to delete archive: {file.name}")
|
||||
file.unlink()
|
||||
deleted_count += 1
|
||||
logger.info(f"[FILE_REMOVED] Deleted archive: {file.name}")
|
||||
except OSError as e:
|
||||
logger.error(f"[FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True)
|
||||
# [DEBUG_ARCHIVE][FILE_ERROR] Log deletion error
|
||||
logger.error(f"[DEBUG_ARCHIVE][FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True)
|
||||
|
||||
|
||||
logger.info(f"[ARCHIVE_RESULT] Cleanup completed. Deleted {deleted_count} archives.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user