From 672be4fd1935a19e750113f95674ad2928921492 Mon Sep 17 00:00:00 2001 From: Volobuev Andrey Date: Thu, 24 Jul 2025 10:21:24 +0300 Subject: [PATCH] add debug for retention --- .gitignore | 1 + backup_script.py | 35 +++++++++++++++++++++++++++++++++-- search_script.py | 2 +- superset_tool/models.py | 2 +- superset_tool/utils/fileio.py | 25 ++++++++++++++++++++++--- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index ae83fe8..e8eb2da 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *__pycache__* *.ps1 keyring passwords.py +logs diff --git a/backup_script.py b/backup_script.py index edd022b..d5d0079 100644 --- a/backup_script.py +++ b/backup_script.py @@ -81,10 +81,24 @@ def setup_clients(logger: SupersetLogger): ) # [DEBUG] Sandbox config created: {sandbox_config.base_url} + # [INFO] Инициализация конфигурации для Preprod + preprod_config = SupersetConfig( + base_url="https://preprodta.bi.dwh.rusal.com/api/v1", + auth={ + "provider": "db", + "username": "migrate_user", + "password": keyring.get_password("system", "preprod migrate"), + "refresh": True + }, + verify_ssl=False + ) + # [DEBUG] Sandbox config created: {sandbox_config.base_url} + # [INFO] Создание экземпляров SupersetClient clients['dev'] = SupersetClient(dev_config, logger) clients['sbx'] = SupersetClient(sandbox_config,logger) clients['prod'] = SupersetClient(prod_config,logger) + clients['preprod'] = SupersetClient(preprod_config,logger) logger.info("[COHERENCE_CHECK_PASSED] Клиенты для окружений успешно инициализированы", extra={"envs": list(clients.keys())}) return clients except Exception as e: @@ -175,7 +189,14 @@ def backup_dashboards(client: SupersetClient, if rotate_archive: # [ANCHOR] ARCHIVE_OLD_BACKUPS try: - archive_exports(dashboard_dir, logger=logger, deduplicate=True) + archive_exports( + str(dashboard_dir), + daily_retention=7, # Сохранять последние 7 дней + weekly_retention=2, # Сохранять последние 2 недели + monthly_retention=3, # Сохранять последние 3 месяца + logger=logger, + deduplicate=True + ) logger.debug(f"[DEBUG] Старые экспорты для '{dashboard_title}' архивированы.") except Exception as cleanup_error: logger.warning( @@ -214,7 +235,7 @@ def backup_dashboards(client: SupersetClient, if clean_folders: # [ANCHOR] Удаляем пустые папки try: - dirs_count = remove_empty_directories(backup_root / env_name , logger=logger) + dirs_count = remove_empty_directories(str(backup_root / env_name), logger=logger) logger.debug(f"[DEBUG] {dirs_count} пустых папок в '{backup_root / env_name }' удалены.") except Exception as clean_error: logger.warning( @@ -303,6 +324,15 @@ def main() -> int: rotate_archive=True, logger=logger ) + + # [ANCHOR] BACKUP_PROD_ENVIRONMENT + preprod_success = backup_dashboards( + clients['preprod'], + "PREPROD", + superset_backup_repo, + rotate_archive=True, + logger=logger + ) # [ANCHOR] FINAL_REPORT # [INFO] Итоговый отчет о выполнении бэкапа @@ -311,6 +341,7 @@ def main() -> int: logger.info(f"[INFO] DEV: {'Успешно' if dev_success else 'С ошибками'}") logger.info(f"[INFO] SBX: {'Успешно' if sbx_success else 'С ошибками'}") logger.info(f"[INFO] PROD: {'Успешно' if prod_success else 'С ошибками'}") + logger.info(f"[INFO] PREPROD: {'Успешно' if preprod_success else 'С ошибками'}") logger.info(f"[INFO] Полный лог доступен в: {log_dir}") if not (dev_success and sbx_success and prod_success): diff --git a/search_script.py b/search_script.py index 595a5ea..20937d4 100644 --- a/search_script.py +++ b/search_script.py @@ -278,7 +278,7 @@ clients = setup_clients(logger) # Поиск всех таблиц в датасете results = search_datasets( client=clients['dev'], - search_pattern=r'dm_view\.counterparty', + search_pattern=r'dm_view\.account_debt', search_fields=["sql"], logger=logger ) diff --git a/superset_tool/models.py b/superset_tool/models.py index 0f64271..354e664 100644 --- a/superset_tool/models.py +++ b/superset_tool/models.py @@ -24,7 +24,7 @@ class SupersetConfig(BaseModel): - `auth` должен содержать обязательные поля для аутентификации по логину/паролю. - `timeout` должен быть положительным числом. """ - base_url: str = Field(..., description="Базовый URL Superset API, включая версию /api/v1.", regex=r'.*/api/v1.*') + base_url: str = Field(..., description="Базовый URL Superset API, включая версию /api/v1.", pattern=r'.*/api/v1.*') auth: Dict[str, str] = Field(..., description="Словарь с данными для аутентификации (provider, username, password, refresh).") verify_ssl: bool = Field(True, description="Флаг для проверки SSL-сертификатов.") timeout: int = Field(30, description="Таймаут в секундах для HTTP-запросов.") diff --git a/superset_tool/utils/fileio.py b/superset_tool/utils/fileio.py index 84e098b..0e6d47a 100644 --- a/superset_tool/utils/fileio.py +++ b/superset_tool/utils/fileio.py @@ -283,10 +283,18 @@ def archive_exports( # [CLEANUP] Удаление устаревших файлов deleted_count = 0 - for file, _ in files_with_dates: + files_to_delete = [] + files_to_keep = [] + + for file, file_date in files_with_dates: # [DEBUG_ARCHIVE] Check file for deletion - logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name}. Should keep: {file in keep_files}") - if file not in keep_files: + should_keep = file in keep_files + logger.debug(f"[DEBUG_ARCHIVE] Checking file for deletion: {file.name} (date: {file_date}). Should keep: {should_keep}") + + if should_keep: + files_to_keep.append(file.name) + else: + files_to_delete.append(file.name) try: # [DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Log deletion attempt logger.info(f"[DEBUG_ARCHIVE][FILE_REMOVED_ATTEMPT] Attempting to delete archive: {file.name}") @@ -296,6 +304,9 @@ def archive_exports( except OSError as e: # [DEBUG_ARCHIVE][FILE_ERROR] Log deletion error logger.error(f"[DEBUG_ARCHIVE][FILE_ERROR] Error deleting {file.name}: {str(e)}", exc_info=True) + + logger.debug(f"[DEBUG_ARCHIVE] Summary - Files to keep: {files_to_keep}") + logger.debug(f"[DEBUG_ARCHIVE] Summary - Files to delete: {files_to_delete}") logger.info(f"[ARCHIVE_RESULT] Cleanup completed. Deleted {deleted_count} archives.") @@ -323,30 +334,38 @@ def apply_retention_policy( weekly_groups = defaultdict(list) monthly_groups = defaultdict(list) + logger.debug(f"[RETENTION_DEBUG] Processing {len(files_with_dates)} files for retention policy") + for file, file_date in files_with_dates: daily_groups[file_date].append(file) weekly_groups[(file_date.isocalendar().year, file_date.isocalendar().week)].append(file) monthly_groups[(file_date.year, file_date.month)].append(file) + + logger.debug(f"[RETENTION_DEBUG] Grouped into {len(daily_groups)} daily groups, {len(weekly_groups)} weekly groups, {len(monthly_groups)} monthly groups") # [SELECTION] Выбор файлов для сохранения keep_files = set() # Daily - последние N дней sorted_daily = sorted(daily_groups.keys(), reverse=True)[:daily] + logger.debug(f"[RETENTION_DEBUG] Daily groups to keep: {sorted_daily}") for day in sorted_daily: keep_files.update(daily_groups[day]) # Weekly - последние N недель sorted_weekly = sorted(weekly_groups.keys(), reverse=True)[:weekly] + logger.debug(f"[RETENTION_DEBUG] Weekly groups to keep: {sorted_weekly}") for week in sorted_weekly: keep_files.update(weekly_groups[week]) # Monthly - последние N месяцев sorted_monthly = sorted(monthly_groups.keys(), reverse=True)[:monthly] + logger.debug(f"[RETENTION_DEBUG] Monthly groups to keep: {sorted_monthly}") for month in sorted_monthly: keep_files.update(monthly_groups[month]) logger.debug(f"[RETENTION] Сохранено файлов: {len(keep_files)}") + logger.debug(f"[RETENTION_DEBUG] Files to keep: {[f.name for f in keep_files]}") return keep_files # [CONTRACT] Сохранение и распаковка дашборда