add debug for retention

This commit is contained in:
Volobuev Andrey
2025-07-24 10:21:24 +03:00
parent d23eef096a
commit 672be4fd19
5 changed files with 58 additions and 7 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
*__pycache__*
*.ps1
keyring passwords.py
logs

View File

@@ -81,10 +81,24 @@ def setup_clients(logger: SupersetLogger):
)
# [DEBUG] Sandbox config created: {sandbox_config.base_url}
# [INFO] Инициализация конфигурации для Preprod
preprod_config = SupersetConfig(
base_url="https://preprodta.bi.dwh.rusal.com/api/v1",
auth={
"provider": "db",
"username": "migrate_user",
"password": keyring.get_password("system", "preprod migrate"),
"refresh": True
},
verify_ssl=False
)
# [DEBUG] Sandbox config created: {sandbox_config.base_url}
# [INFO] Создание экземпляров SupersetClient
clients['dev'] = SupersetClient(dev_config, logger)
clients['sbx'] = SupersetClient(sandbox_config,logger)
clients['prod'] = SupersetClient(prod_config,logger)
clients['preprod'] = SupersetClient(preprod_config,logger)
logger.info("[COHERENCE_CHECK_PASSED] Клиенты для окружений успешно инициализированы", extra={"envs": list(clients.keys())})
return clients
except Exception as e:
@@ -175,7 +189,14 @@ def backup_dashboards(client: SupersetClient,
if rotate_archive:
# [ANCHOR] ARCHIVE_OLD_BACKUPS
try:
archive_exports(dashboard_dir, logger=logger, deduplicate=True)
archive_exports(
str(dashboard_dir),
daily_retention=7, # Сохранять последние 7 дней
weekly_retention=2, # Сохранять последние 2 недели
monthly_retention=3, # Сохранять последние 3 месяца
logger=logger,
deduplicate=True
)
logger.debug(f"[DEBUG] Старые экспорты для '{dashboard_title}' архивированы.")
except Exception as cleanup_error:
logger.warning(
@@ -214,7 +235,7 @@ def backup_dashboards(client: SupersetClient,
if clean_folders:
# [ANCHOR] Удаляем пустые папки
try:
dirs_count = remove_empty_directories(backup_root / env_name , logger=logger)
dirs_count = remove_empty_directories(str(backup_root / env_name), logger=logger)
logger.debug(f"[DEBUG] {dirs_count} пустых папок в '{backup_root / env_name }' удалены.")
except Exception as clean_error:
logger.warning(
@@ -304,6 +325,15 @@ def main() -> int:
logger=logger
)
# [ANCHOR] BACKUP_PROD_ENVIRONMENT
preprod_success = backup_dashboards(
clients['preprod'],
"PREPROD",
superset_backup_repo,
rotate_archive=True,
logger=logger
)
# [ANCHOR] FINAL_REPORT
# [INFO] Итоговый отчет о выполнении бэкапа
logger.info("="*50)
@@ -311,6 +341,7 @@ def main() -> int:
logger.info(f"[INFO] DEV: {'Успешно' if dev_success else 'С ошибками'}")
logger.info(f"[INFO] SBX: {'Успешно' if sbx_success else 'С ошибками'}")
logger.info(f"[INFO] PROD: {'Успешно' if prod_success else 'С ошибками'}")
logger.info(f"[INFO] PREPROD: {'Успешно' if preprod_success else 'С ошибками'}")
logger.info(f"[INFO] Полный лог доступен в: {log_dir}")
if not (dev_success and sbx_success and prod_success):

View File

@@ -278,7 +278,7 @@ clients = setup_clients(logger)
# Поиск всех таблиц в датасете
results = search_datasets(
client=clients['dev'],
search_pattern=r'dm_view\.counterparty',
search_pattern=r'dm_view\.account_debt',
search_fields=["sql"],
logger=logger
)

View File

@@ -24,7 +24,7 @@ class SupersetConfig(BaseModel):
- `auth` должен содержать обязательные поля для аутентификации по логину/паролю.
- `timeout` должен быть положительным числом.
"""
base_url: str = Field(..., description="Базовый URL Superset API, включая версию /api/v1.", regex=r'.*/api/v1.*')
base_url: str = Field(..., description="Базовый URL Superset API, включая версию /api/v1.", pattern=r'.*/api/v1.*')
auth: Dict[str, str] = Field(..., description="Словарь с данными для аутентификации (provider, username, password, refresh).")
verify_ssl: bool = Field(True, description="Флаг для проверки SSL-сертификатов.")
timeout: int = Field(30, description="Таймаут в секундах для HTTP-запросов.")

View File

@@ -283,10 +283,18 @@ def archive_exports(
# [CLEANUP] Удаление устаревших файлов
deleted_count = 0
for file, _ in files_with_dates:
files_to_delete = []
files_to_keep = []
for file, file_date in files_with_dates:
# [DEBUG_ARCHIVE] Check file for deletion
logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name}. Should keep: {file in keep_files}")
if file not in keep_files:
should_keep = file in keep_files
logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name} (date: {file_date}). Should keep: {should_keep}")
if should_keep:
files_to_keep.append(file.name)
else:
files_to_delete.append(file.name)
try:
# [DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Log deletion attempt
logger.info(f"[DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Attempting to delete archive: {file.name}")
@@ -297,6 +305,9 @@ def archive_exports(
# [DEBUG_ARCHIVE][FILE_ERROR] Log deletion error
logger.error(f"[DEBUG_ARCHIVE][FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True)
logger.debug(f"[DEBUG_ARCHIVE] Summary - Files to keep: {files_to_keep}")
logger.debug(f"[DEBUG_ARCHIVE] Summary - Files to delete: {files_to_delete}")
logger.info(f"[ARCHIVE_RESULT] Cleanup completed. Deleted {deleted_count} archives.")
@@ -323,30 +334,38 @@ def apply_retention_policy(
weekly_groups = defaultdict(list)
monthly_groups = defaultdict(list)
logger.debug(f"[RETENTION_DEBUG] Processing {len(files_with_dates)} files for retention policy")
for file, file_date in files_with_dates:
daily_groups[file_date].append(file)
weekly_groups[(file_date.isocalendar().year, file_date.isocalendar().week)].append(file)
monthly_groups[(file_date.year, file_date.month)].append(file)
logger.debug(f"[RETENTION_DEBUG] Grouped into {len(daily_groups)} daily groups, {len(weekly_groups)} weekly groups, {len(monthly_groups)} monthly groups")
# [SELECTION] Выбор файлов для сохранения
keep_files = set()
# Daily - последние N дней
sorted_daily = sorted(daily_groups.keys(), reverse=True)[:daily]
logger.debug(f"[RETENTION_DEBUG] Daily groups to keep: {sorted_daily}")
for day in sorted_daily:
keep_files.update(daily_groups[day])
# Weekly - последние N недель
sorted_weekly = sorted(weekly_groups.keys(), reverse=True)[:weekly]
logger.debug(f"[RETENTION_DEBUG] Weekly groups to keep: {sorted_weekly}")
for week in sorted_weekly:
keep_files.update(weekly_groups[week])
# Monthly - последние N месяцев
sorted_monthly = sorted(monthly_groups.keys(), reverse=True)[:monthly]
logger.debug(f"[RETENTION_DEBUG] Monthly groups to keep: {sorted_monthly}")
for month in sorted_monthly:
keep_files.update(monthly_groups[month])
logger.debug(f"[RETENTION] Сохранено файлов: {len(keep_files)}")
logger.debug(f"[RETENTION_DEBUG] Files to keep: {[f.name for f in keep_files]}")
return keep_files
# [CONTRACT] Сохранение и распаковка дашборда