# [DEF:backend.src.core.migration_engine:Module] # # @SEMANTICS: migration, engine, zip, yaml, transformation # @PURPOSE: Handles the interception and transformation of Superset asset ZIP archives. # @LAYER: Core # @RELATION: DEPENDS_ON -> PyYAML # # @INVARIANT: ZIP structure must be preserved after transformation. # [SECTION: IMPORTS] import zipfile import yaml import os import shutil import tempfile from pathlib import Path from typing import Dict from .logger import logger, belief_scope import yaml # [/SECTION] # [DEF:MigrationEngine:Class] # @PURPOSE: Engine for transforming Superset export ZIPs. class MigrationEngine: # [DEF:MigrationEngine.transform_zip:Function] # @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, and re-packages. # @PARAM: zip_path (str) - Path to the source ZIP file. # @PARAM: output_path (str) - Path where the transformed ZIP will be saved. # @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID. # @PARAM: strip_databases (bool) - Whether to remove the databases directory from the archive. # @RETURN: bool - True if successful. def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True) -> bool: """ Transform a Superset export ZIP by replacing database UUIDs. """ with belief_scope("MigrationEngine.transform_zip"): with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) try: # 1. Extract logger.info(f"[MigrationEngine.transform_zip][Action] Extracting ZIP: {zip_path}") with zipfile.ZipFile(zip_path, 'r') as zf: zf.extractall(temp_dir) # 2. Transform YAMLs # Datasets are usually in datasets/*.yaml dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml")) dataset_files = list(set(dataset_files)) logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dataset_files)} dataset files.") for ds_file in dataset_files: logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}") self._transform_yaml(ds_file, db_mapping) # 3. Re-package logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})") with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: for root, dirs, files in os.walk(temp_dir): rel_root = Path(root).relative_to(temp_dir) if strip_databases and "databases" in rel_root.parts: logger.info(f"[MigrationEngine.transform_zip][Action] Skipping file in databases directory: {rel_root}") continue for file in files: file_path = Path(root) / file arcname = file_path.relative_to(temp_dir) zf.write(file_path, arcname) return True except Exception as e: logger.error(f"[MigrationEngine.transform_zip][Coherence:Failed] Error transforming ZIP: {e}") return False # [DEF:MigrationEngine._transform_yaml:Function] # @PURPOSE: Replaces database_uuid in a single YAML file. def _transform_yaml(self, file_path: Path, db_mapping: Dict[str, str]): with open(file_path, 'r') as f: data = yaml.safe_load(f) if not data: return # Superset dataset YAML structure: # database_uuid: ... source_uuid = data.get('database_uuid') if source_uuid in db_mapping: data['database_uuid'] = db_mapping[source_uuid] with open(file_path, 'w') as f: yaml.dump(data, f) # [/DEF:MigrationEngine._transform_yaml] # [/DEF:MigrationEngine] # [/DEF:backend.src.core.migration_engine]