mappings+migrate

This commit is contained in:
2025-12-27 10:16:41 +03:00
parent 3d75a21127
commit 6962a78112
19 changed files with 925 additions and 143 deletions

View File

@@ -0,0 +1,71 @@
# [DEF:backend.src.api.routes.migration:Module]
# @SEMANTICS: api, migration, dashboards
# @PURPOSE: API endpoints for migration operations.
# @LAYER: API
# @RELATION: DEPENDS_ON -> backend.src.dependencies
# @RELATION: DEPENDS_ON -> backend.src.models.dashboard
from fastapi import APIRouter, Depends, HTTPException
from typing import List, Dict
from backend.src.dependencies import get_config_manager, get_task_manager
from backend.src.models.dashboard import DashboardMetadata, DashboardSelection
from backend.src.core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
router = APIRouter(prefix="/api", tags=["migration"])
# [DEF:get_dashboards:Function]
# @PURPOSE: Fetch all dashboards from the specified environment for the grid.
# @PRE: Environment ID must be valid.
# @POST: Returns a list of dashboard metadata.
# @PARAM: env_id (str) - The ID of the environment to fetch from.
# @RETURN: List[DashboardMetadata]
@router.get("/environments/{env_id}/dashboards", response_model=List[DashboardMetadata])
async def get_dashboards(env_id: str, config_manager=Depends(get_config_manager)):
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
if not env:
raise HTTPException(status_code=404, detail="Environment not found")
config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={'provider': 'db', 'username': env.username, 'password': env.password, 'refresh': False},
verify_ssl=True,
timeout=30
)
client = SupersetClient(config)
dashboards = client.get_dashboards_summary()
return dashboards
# [/DEF:get_dashboards]
# [DEF:execute_migration:Function]
# @PURPOSE: Execute the migration of selected dashboards.
# @PRE: Selection must be valid and environments must exist.
# @POST: Starts the migration task and returns the task ID.
# @PARAM: selection (DashboardSelection) - The dashboards to migrate.
# @RETURN: Dict - {"task_id": str, "message": str}
@router.post("/migration/execute")
async def execute_migration(selection: DashboardSelection, config_manager=Depends(get_config_manager), task_manager=Depends(get_task_manager)):
# Validate environments exist
environments = config_manager.get_environments()
env_ids = {e.id for e in environments}
if selection.source_env_id not in env_ids or selection.target_env_id not in env_ids:
raise HTTPException(status_code=400, detail="Invalid source or target environment")
# Create migration task with debug logging
from ...core.logger import logger
logger.info(f"Creating migration task with selection: {selection.dict()}")
logger.info(f"Available environments: {env_ids}")
logger.info(f"Source env: {selection.source_env_id}, Target env: {selection.target_env_id}")
try:
task = await task_manager.create_task("superset-migration", selection.dict())
logger.info(f"Task created successfully: {task.id}")
return {"task_id": task.id, "message": "Migration initiated"}
except Exception as e:
logger.error(f"Task creation failed: {e}")
raise HTTPException(status_code=500, detail=f"Failed to create migration task: {str(e)}")
# [/DEF:execute_migration]
# [/DEF:backend.src.api.routes.migration]

View File

@@ -20,7 +20,7 @@ import os
from .dependencies import get_task_manager
from .core.logger import logger
from .api.routes import plugins, tasks, settings, environments, mappings
from .api.routes import plugins, tasks, settings, environments, mappings, migration
from .core.database import init_db
# Initialize database
@@ -51,6 +51,7 @@ app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(environments.router)
app.include_router(mappings.router)
app.include_router(migration.router)
# [DEF:WebSocketEndpoint:Endpoint]
# @SEMANTICS: websocket, logs, streaming, real-time

View File

@@ -15,6 +15,8 @@ import shutil
import tempfile
from pathlib import Path
from typing import Dict
from .logger import logger, belief_scope
import yaml
# [/SECTION]
# [DEF:MigrationEngine:Class]
@@ -26,37 +28,51 @@ class MigrationEngine:
# @PARAM: zip_path (str) - Path to the source ZIP file.
# @PARAM: output_path (str) - Path where the transformed ZIP will be saved.
# @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID.
# @PARAM: strip_databases (bool) - Whether to remove the databases directory from the archive.
# @RETURN: bool - True if successful.
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str]) -> bool:
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True) -> bool:
"""
Transform a Superset export ZIP by replacing database UUIDs.
"""
with tempfile.TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
with belief_scope("MigrationEngine.transform_zip"):
with tempfile.TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
try:
# 1. Extract
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(temp_dir)
try:
# 1. Extract
logger.info(f"[MigrationEngine.transform_zip][Action] Extracting ZIP: {zip_path}")
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(temp_dir)
# 2. Transform YAMLs
# Datasets are usually in datasets/*.yaml
dataset_files = list(temp_dir.glob("**/datasets/*.yaml"))
for ds_file in dataset_files:
self._transform_yaml(ds_file, db_mapping)
# 2. Transform YAMLs
# Datasets are usually in datasets/*.yaml
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
dataset_files = list(set(dataset_files))
logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dataset_files)} dataset files.")
for ds_file in dataset_files:
logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}")
self._transform_yaml(ds_file, db_mapping)
# 3. Re-package
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(temp_dir):
for file in files:
file_path = Path(root) / file
arcname = file_path.relative_to(temp_dir)
zf.write(file_path, arcname)
return True
except Exception as e:
print(f"Error transforming ZIP: {e}")
return False
# 3. Re-package
logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(temp_dir):
rel_root = Path(root).relative_to(temp_dir)
if strip_databases and "databases" in rel_root.parts:
logger.info(f"[MigrationEngine.transform_zip][Action] Skipping file in databases directory: {rel_root}")
continue
for file in files:
file_path = Path(root) / file
arcname = file_path.relative_to(temp_dir)
zf.write(file_path, arcname)
return True
except Exception as e:
logger.error(f"[MigrationEngine.transform_zip][Coherence:Failed] Error transforming ZIP: {e}")
return False
# [DEF:MigrationEngine._transform_yaml:Function]
# @PURPOSE: Replaces database_uuid in a single YAML file.

View File

@@ -47,12 +47,17 @@ class PluginLoader:
Loads a single Python module and extracts PluginBase subclasses.
"""
# Try to determine the correct package prefix based on how the app is running
if "backend.src" in __name__:
# For standalone execution, we need to handle the import differently
if __name__ == "__main__" or "test" in __name__:
# When running as standalone or in tests, use relative import
package_name = f"plugins.{module_name}"
elif "backend.src" in __name__:
package_prefix = "backend.src.plugins"
package_name = f"{package_prefix}.{module_name}"
else:
package_prefix = "src.plugins"
package_name = f"{package_prefix}.{module_name}"
package_name = f"{package_prefix}.{module_name}"
# print(f"DEBUG: Loading plugin {module_name} as {package_name}")
spec = importlib.util.spec_from_file_location(package_name, file_path)
if spec is None or spec.loader is None:
@@ -106,9 +111,11 @@ class PluginLoader:
# validate(instance={}, schema=schema)
self._plugins[plugin_id] = plugin_instance
self._plugin_configs[plugin_id] = plugin_config
print(f"Plugin '{plugin_instance.name}' (ID: {plugin_id}) loaded successfully.") # Replace with proper logging
from ..core.logger import logger
logger.info(f"Plugin '{plugin_instance.name}' (ID: {plugin_id}) loaded successfully.")
except Exception as e:
print(f"Error validating plugin '{plugin_instance.name}' (ID: {plugin_id}): {e}") # Replace with proper logging
from ..core.logger import logger
logger.error(f"Error validating plugin '{plugin_instance.name}' (ID: {plugin_id}): {e}")
def get_plugin(self, plugin_id: str) -> Optional[PluginBase]:

View File

@@ -52,6 +52,32 @@ class SupersetClient(BaseSupersetClient):
return databases[0] if databases else None
# [/DEF:SupersetClient.get_database_by_uuid]
# [DEF:SupersetClient.get_dashboards_summary:Function]
# @PURPOSE: Fetches dashboard metadata optimized for the grid.
# @POST: Returns a list of dashboard dictionaries.
# @RETURN: List[Dict]
def get_dashboards_summary(self) -> List[Dict]:
"""
Fetches dashboard metadata optimized for the grid.
Returns a list of dictionaries mapped to DashboardMetadata fields.
"""
query = {
"columns": ["id", "dashboard_title", "changed_on_utc", "published"]
}
_, dashboards = self.get_dashboards(query=query)
# Map fields to DashboardMetadata schema
result = []
for dash in dashboards:
result.append({
"id": dash.get("id"),
"title": dash.get("dashboard_title"),
"last_modified": dash.get("changed_on_utc"),
"status": "published" if dash.get("published") else "draft"
})
return result
# [/DEF:SupersetClient.get_dashboards_summary]
# [/DEF:SupersetClient]
# [/DEF:backend.src.core.superset_client]

View File

@@ -64,7 +64,10 @@ class TaskManager:
self.tasks: Dict[str, Task] = {}
self.subscribers: Dict[str, List[asyncio.Queue]] = {}
self.executor = ThreadPoolExecutor(max_workers=5) # For CPU-bound plugin execution
self.loop = asyncio.get_event_loop()
try:
self.loop = asyncio.get_running_loop()
except RuntimeError:
self.loop = asyncio.get_event_loop()
self.task_futures: Dict[str, asyncio.Future] = {}
# [/DEF]
@@ -72,17 +75,25 @@ class TaskManager:
"""
Creates and queues a new task for execution.
"""
from ..core.logger import logger
logger.info(f"TaskManager: Creating task for plugin '{plugin_id}' with params: {params}")
if not self.plugin_loader.has_plugin(plugin_id):
logger.error(f"TaskManager: Plugin with ID '{plugin_id}' not found.")
raise ValueError(f"Plugin with ID '{plugin_id}' not found.")
plugin = self.plugin_loader.get_plugin(plugin_id)
logger.info(f"TaskManager: Found plugin '{plugin.name}' for task creation")
# Validate params against plugin schema (this will be done at a higher level, e.g., API route)
# For now, a basic check
if not isinstance(params, dict):
logger.error("TaskManager: Task parameters must be a dictionary.")
raise ValueError("Task parameters must be a dictionary.")
task = Task(plugin_id=plugin_id, params=params, user_id=user_id)
self.tasks[task.id] = task
logger.info(f"TaskManager: Task {task.id} created and scheduled for execution")
self.loop.create_task(self._run_task(task.id)) # Schedule task for execution
return task
@@ -90,9 +101,11 @@ class TaskManager:
"""
Internal method to execute a task.
"""
from ..core.logger import logger
task = self.tasks[task_id]
plugin = self.plugin_loader.get_plugin(task.plugin_id)
logger.info(f"TaskManager: Starting execution of task {task_id} for plugin '{plugin.name}'")
task.status = TaskStatus.RUNNING
task.started_at = datetime.utcnow()
self._add_log(task_id, "INFO", f"Task started for plugin '{plugin.name}'")
@@ -103,17 +116,27 @@ class TaskManager:
# If the plugin's execute method is already async, this can be simplified.
# Pass task_id to plugin so it can signal pause
params = {**task.params, "_task_id": task_id}
await self.loop.run_in_executor(
self.executor,
lambda: asyncio.run(plugin.execute(params)) if asyncio.iscoroutinefunction(plugin.execute) else plugin.execute(params)
)
logger.info(f"TaskManager: Executing plugin '{plugin.name}' with params: {params}")
if asyncio.iscoroutinefunction(plugin.execute):
logger.info(f"TaskManager: Executing async plugin '{plugin.name}'")
await plugin.execute(params)
else:
logger.info(f"TaskManager: Executing sync plugin '{plugin.name}' in executor")
await self.loop.run_in_executor(
self.executor,
plugin.execute,
params
)
logger.info(f"TaskManager: Task {task_id} completed successfully for plugin '{plugin.name}'")
task.status = TaskStatus.SUCCESS
self._add_log(task_id, "INFO", f"Task completed successfully for plugin '{plugin.name}'")
except Exception as e:
logger.error(f"TaskManager: Task {task_id} failed for plugin '{plugin.name}': {e}")
task.status = TaskStatus.FAILED
self._add_log(task_id, "ERROR", f"Task failed: {e}", {"error_type": type(e).__name__})
finally:
task.finished_at = datetime.utcnow()
logger.info(f"TaskManager: Task {task_id} execution finished with status: {task.status}")
# In a real system, you might notify clients via WebSocket here
async def resolve_task(self, task_id: str, resolution_params: Dict[str, Any]):

View File

@@ -21,7 +21,12 @@ def get_config_manager() -> ConfigManager:
plugin_dir = Path(__file__).parent / "plugins"
plugin_loader = PluginLoader(plugin_dir=str(plugin_dir))
from .core.logger import logger
logger.info(f"PluginLoader initialized with directory: {plugin_dir}")
logger.info(f"Available plugins: {[config.name for config in plugin_loader.get_all_plugin_configs()]}")
task_manager = TaskManager(plugin_loader)
logger.info("TaskManager initialized")
def get_plugin_loader() -> PluginLoader:
"""Dependency injector for the PluginLoader."""

View File

@@ -0,0 +1,27 @@
# [DEF:backend.src.models.dashboard:Module]
# @SEMANTICS: dashboard, model, metadata, migration
# @PURPOSE: Defines data models for dashboard metadata and selection.
# @LAYER: Model
# @RELATION: USED_BY -> backend.src.api.routes.migration
from pydantic import BaseModel
from typing import List
# [DEF:DashboardMetadata:Class]
# @PURPOSE: Represents a dashboard available for migration.
class DashboardMetadata(BaseModel):
id: int
title: str
last_modified: str
status: str
# [/DEF:DashboardMetadata]
# [DEF:DashboardSelection:Class]
# @PURPOSE: Represents the user's selection of dashboards to migrate.
class DashboardSelection(BaseModel):
selected_ids: List[int]
source_env_id: str
target_env_id: str
# [/DEF:DashboardSelection]
# [/DEF:backend.src.models.dashboard]

View File

@@ -87,34 +87,72 @@ class MigrationPlugin(PluginBase):
}
async def execute(self, params: Dict[str, Any]):
from_env = params["from_env"]
to_env = params["to_env"]
dashboard_regex = params["dashboard_regex"]
source_env_id = params.get("source_env_id")
target_env_id = params.get("target_env_id")
selected_ids = params.get("selected_ids")
# Legacy support or alternative params
from_env_name = params.get("from_env")
to_env_name = params.get("to_env")
dashboard_regex = params.get("dashboard_regex")
replace_db_config = params.get("replace_db_config", False)
from_db_id = params.get("from_db_id")
to_db_id = params.get("to_db_id")
logger = SupersetLogger(log_dir=Path.cwd() / "logs", console=True)
logger.info(f"[MigrationPlugin][Entry] Starting migration from {from_env} to {to_env}.")
logger.info(f"[MigrationPlugin][Entry] Starting migration task.")
logger.info(f"[MigrationPlugin][Action] Params: {params}")
try:
config_manager = get_config_manager()
all_clients = setup_clients(logger, custom_envs=config_manager.get_environments())
from_c = all_clients.get(from_env)
to_c = all_clients.get(to_env)
environments = config_manager.get_environments()
# Resolve environments
src_env = None
tgt_env = None
if source_env_id:
src_env = next((e for e in environments if e.id == source_env_id), None)
elif from_env_name:
src_env = next((e for e in environments if e.name == from_env_name), None)
if target_env_id:
tgt_env = next((e for e in environments if e.id == target_env_id), None)
elif to_env_name:
tgt_env = next((e for e in environments if e.name == to_env_name), None)
if not src_env or not tgt_env:
raise ValueError(f"Could not resolve source or target environment. Source: {source_env_id or from_env_name}, Target: {target_env_id or to_env_name}")
from_env_name = src_env.name
to_env_name = tgt_env.name
logger.info(f"[MigrationPlugin][State] Resolved environments: {from_env_name} -> {to_env_name}")
all_clients = setup_clients(logger, custom_envs=environments)
from_c = all_clients.get(from_env_name)
to_c = all_clients.get(to_env_name)
if not from_c or not to_c:
raise ValueError(f"One or both environments ('{from_env}', '{to_env}') not found in configuration.")
raise ValueError(f"Clients not initialized for environments: {from_env_name}, {to_env_name}")
_, all_dashboards = from_c.get_dashboards()
regex_str = str(dashboard_regex)
dashboards_to_migrate = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
dashboards_to_migrate = []
if selected_ids:
dashboards_to_migrate = [d for d in all_dashboards if d["id"] in selected_ids]
elif dashboard_regex:
regex_str = str(dashboard_regex)
dashboards_to_migrate = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
else:
logger.warning("[MigrationPlugin][State] No selection criteria provided (selected_ids or dashboard_regex).")
return
if not dashboards_to_migrate:
logger.warning("[MigrationPlugin][State] No dashboards found matching the regex.")
logger.warning("[MigrationPlugin][State] No dashboards found matching criteria.")
return
# Fetch mappings from database
@@ -123,8 +161,8 @@ class MigrationPlugin(PluginBase):
db = SessionLocal()
try:
# Find environment IDs by name
src_env = db.query(Environment).filter(Environment.name == from_env).first()
tgt_env = db.query(Environment).filter(Environment.name == to_env).first()
src_env = db.query(Environment).filter(Environment.name == from_env_name).first()
tgt_env = db.query(Environment).filter(Environment.name == to_env_name).first()
if src_env and tgt_env:
mappings = db.query(DatabaseMapping).filter(
@@ -144,46 +182,39 @@ class MigrationPlugin(PluginBase):
try:
exported_content, _ = from_c.export_dashboard(dash_id)
with create_temp_file(content=exported_content, dry_run=True, suffix=".zip", logger=logger) as tmp_zip_path:
if not replace_db_config:
to_c.import_dashboard(file_name=tmp_zip_path, dash_id=dash_id, dash_slug=dash_slug)
else:
# Check for missing mappings before transformation
# This is a simplified check, in reality we'd check all YAMLs
# For US3, we'll just use the engine and handle missing ones there
with create_temp_file(suffix=".zip", dry_run=True, logger=logger) as tmp_new_zip:
# If we have missing mappings, we might need to pause
# For now, let's assume the engine can tell us what's missing
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping)
if not success:
# Signal missing mapping and wait
task_id = params.get("_task_id")
if task_id:
from ..dependencies import get_task_manager
tm = get_task_manager()
logger.info(f"[MigrationPlugin][Action] Pausing for missing mapping in task {task_id}")
# In a real scenario, we'd pass the missing DB info to the frontend
# For this task, we'll just simulate the wait
await tm.wait_for_resolution(task_id)
# After resolution, retry transformation with updated mappings
# (Mappings would be updated in task.params by resolve_task)
db = SessionLocal()
try:
src_env = db.query(Environment).filter(Environment.name == from_env).first()
tgt_env = db.query(Environment).filter(Environment.name == to_env).first()
mappings = db.query(DatabaseMapping).filter(
DatabaseMapping.source_env_id == src_env.id,
DatabaseMapping.target_env_id == tgt_env.id
).all()
db_mapping = {m.source_db_uuid: m.target_db_uuid for m in mappings}
finally:
db.close()
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping)
# Always transform to strip databases to avoid password errors
with create_temp_file(suffix=".zip", dry_run=True, logger=logger) as tmp_new_zip:
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
if not success and replace_db_config:
# Signal missing mapping and wait (only if we care about mappings)
task_id = params.get("_task_id")
if task_id:
from ..dependencies import get_task_manager
tm = get_task_manager()
logger.info(f"[MigrationPlugin][Action] Pausing for missing mapping in task {task_id}")
# In a real scenario, we'd pass the missing DB info to the frontend
# For this task, we'll just simulate the wait
await tm.wait_for_resolution(task_id)
# After resolution, retry transformation with updated mappings
# (Mappings would be updated in task.params by resolve_task)
db = SessionLocal()
try:
src_env = db.query(Environment).filter(Environment.name == from_env_name).first()
tgt_env = db.query(Environment).filter(Environment.name == to_env_name).first()
mappings = db.query(DatabaseMapping).filter(
DatabaseMapping.source_env_id == src_env.id,
DatabaseMapping.target_env_id == tgt_env.id
).all()
db_mapping = {m.source_db_uuid: m.target_db_uuid for m in mappings}
finally:
db.close()
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
else:
logger.error(f"[MigrationPlugin][Failure] Failed to transform ZIP for dashboard {title}")
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
else:
logger.error(f"[MigrationPlugin][Failure] Failed to transform ZIP for dashboard {title}")
logger.info(f"[MigrationPlugin][Success] Dashboard {title} imported.")
except Exception as exc: