# [DEF:backend.src.core.migration_engine:Module] # # @SEMANTICS: migration, engine, zip, yaml, transformation # @PURPOSE: Handles the interception and transformation of Superset asset ZIP archives. # @LAYER: Core # @RELATION: DEPENDS_ON -> PyYAML # # @INVARIANT: ZIP structure must be preserved after transformation. # [SECTION: IMPORTS] import zipfile import yaml import os import shutil import tempfile from pathlib import Path from typing import Dict # [/SECTION] # [DEF:MigrationEngine:Class] # @PURPOSE: Engine for transforming Superset export ZIPs. class MigrationEngine: # [DEF:MigrationEngine.transform_zip:Function] # @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, and re-packages. # @PARAM: zip_path (str) - Path to the source ZIP file. # @PARAM: output_path (str) - Path where the transformed ZIP will be saved. # @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID. # @RETURN: bool - True if successful. def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str]) -> bool: """ Transform a Superset export ZIP by replacing database UUIDs. """ with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) try: # 1. Extract with zipfile.ZipFile(zip_path, 'r') as zf: zf.extractall(temp_dir) # 2. Transform YAMLs # Datasets are usually in datasets/*.yaml dataset_files = list(temp_dir.glob("**/datasets/*.yaml")) for ds_file in dataset_files: self._transform_yaml(ds_file, db_mapping) # 3. Re-package with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: for root, dirs, files in os.walk(temp_dir): for file in files: file_path = Path(root) / file arcname = file_path.relative_to(temp_dir) zf.write(file_path, arcname) return True except Exception as e: print(f"Error transforming ZIP: {e}") return False # [DEF:MigrationEngine._transform_yaml:Function] # @PURPOSE: Replaces database_uuid in a single YAML file. def _transform_yaml(self, file_path: Path, db_mapping: Dict[str, str]): with open(file_path, 'r') as f: data = yaml.safe_load(f) if not data: return # Superset dataset YAML structure: # database_uuid: ... source_uuid = data.get('database_uuid') if source_uuid in db_mapping: data['database_uuid'] = db_mapping[source_uuid] with open(file_path, 'w') as f: yaml.dump(data, f) # [/DEF:MigrationEngine._transform_yaml] # [/DEF:MigrationEngine] # [/DEF:backend.src.core.migration_engine]