This commit is contained in:
2026-01-12 12:33:51 +03:00
parent 7a9b1a190a
commit 696aac32e7
30 changed files with 1511 additions and 593 deletions

View File

@@ -1 +1 @@
from . import plugins, tasks, settings from . import plugins, tasks, settings, connections

View File

@@ -0,0 +1,78 @@
# [DEF:ConnectionsRouter:Module]
# @SEMANTICS: api, router, connections, database
# @PURPOSE: Defines the FastAPI router for managing external database connections.
# @LAYER: UI (API)
# @RELATION: Depends on SQLAlchemy session.
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
from typing import List
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from ...core.database import get_db
from ...models.connection import ConnectionConfig
from pydantic import BaseModel, Field
from datetime import datetime
from ...core.logger import logger, belief_scope
# [/SECTION]
router = APIRouter()
# [DEF:ConnectionSchema:Class]
class ConnectionSchema(BaseModel):
id: str
name: str
type: str
host: Optional[str] = None
port: Optional[int] = None
database: Optional[str] = None
username: Optional[str] = None
created_at: datetime
class Config:
orm_mode = True
# [DEF:ConnectionCreate:Class]
class ConnectionCreate(BaseModel):
name: str
type: str
host: Optional[str] = None
port: Optional[int] = None
database: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None
from typing import Optional
# [DEF:list_connections:Function]
@router.get("", response_model=List[ConnectionSchema])
async def list_connections(db: Session = Depends(get_db)):
with belief_scope("ConnectionsRouter.list_connections"):
connections = db.query(ConnectionConfig).all()
return connections
# [DEF:create_connection:Function]
@router.post("", response_model=ConnectionSchema, status_code=status.HTTP_201_CREATED)
async def create_connection(connection: ConnectionCreate, db: Session = Depends(get_db)):
with belief_scope("ConnectionsRouter.create_connection", f"name={connection.name}"):
db_connection = ConnectionConfig(**connection.dict())
db.add(db_connection)
db.commit()
db.refresh(db_connection)
logger.info(f"[ConnectionsRouter.create_connection][Success] Created connection {db_connection.id}")
return db_connection
# [DEF:delete_connection:Function]
@router.delete("/{connection_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_connection(connection_id: str, db: Session = Depends(get_db)):
with belief_scope("ConnectionsRouter.delete_connection", f"id={connection_id}"):
db_connection = db.query(ConnectionConfig).filter(ConnectionConfig.id == connection_id).first()
if not db_connection:
logger.error(f"[ConnectionsRouter.delete_connection][State] Connection {connection_id} not found")
raise HTTPException(status_code=404, detail="Connection not found")
db.delete(db_connection)
db.commit()
logger.info(f"[ConnectionsRouter.delete_connection][Success] Deleted connection {connection_id}")
return
# [/DEF:ConnectionsRouter:Module]

View File

@@ -20,7 +20,7 @@ import os
from .dependencies import get_task_manager, get_scheduler_service from .dependencies import get_task_manager, get_scheduler_service
from .core.logger import logger from .core.logger import logger
from .api.routes import plugins, tasks, settings, environments, mappings, migration from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections
from .core.database import init_db from .core.database import init_db
# [DEF:App:Global] # [DEF:App:Global]
@@ -66,6 +66,7 @@ async def log_requests(request: Request, call_next):
app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"]) app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"]) app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"]) app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"])
app.include_router(environments.router, prefix="/api/environments", tags=["Environments"]) app.include_router(environments.router, prefix="/api/environments", tags=["Environments"])
app.include_router(mappings.router) app.include_router(mappings.router)
app.include_router(migration.router) app.include_router(migration.router)

View File

@@ -12,8 +12,9 @@
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session from sqlalchemy.orm import sessionmaker, Session
from backend.src.models.mapping import Base from backend.src.models.mapping import Base
# Import TaskRecord to ensure it's registered with Base # Import models to ensure they're registered with Base
from backend.src.models.task import TaskRecord from backend.src.models.task import TaskRecord
from backend.src.models.connection import ConnectionConfig
import os import os
# [/SECTION] # [/SECTION]

View File

@@ -78,6 +78,28 @@ class SupersetClient(BaseSupersetClient):
return result return result
# [/DEF:SupersetClient.get_dashboards_summary:Function] # [/DEF:SupersetClient.get_dashboards_summary:Function]
# [DEF:SupersetClient.get_dataset:Function]
# @PURPOSE: Fetch full dataset structure including columns and metrics.
# @PARAM: dataset_id (int) - The ID of the dataset.
# @RETURN: Dict - The dataset metadata.
def get_dataset(self, dataset_id: int) -> Dict:
"""
Fetch full dataset structure.
"""
return self.network.get(f"/api/v1/dataset/{dataset_id}").json()
# [/DEF:SupersetClient.get_dataset:Function]
# [DEF:SupersetClient.update_dataset:Function]
# @PURPOSE: Update dataset metadata.
# @PARAM: dataset_id (int) - The ID of the dataset.
# @PARAM: data (Dict) - The payload for update.
def update_dataset(self, dataset_id: int, data: Dict):
"""
Update dataset metadata.
"""
self.network.put(f"/api/v1/dataset/{dataset_id}", json=data)
# [/DEF:SupersetClient.update_dataset:Function]
# [/DEF:SupersetClient:Class] # [/DEF:SupersetClient:Class]
# [/DEF:backend.src.core.superset_client:Module] # [/DEF:backend.src.core.superset_client:Module]

View File

@@ -98,9 +98,9 @@ class TaskManager:
params = {**task.params, "_task_id": task_id} params = {**task.params, "_task_id": task_id}
if asyncio.iscoroutinefunction(plugin.execute): if asyncio.iscoroutinefunction(plugin.execute):
await plugin.execute(params) task.result = await plugin.execute(params)
else: else:
await self.loop.run_in_executor( task.result = await self.loop.run_in_executor(
self.executor, self.executor,
plugin.execute, plugin.execute,
params params

View File

@@ -51,6 +51,7 @@ class Task(BaseModel):
params: Dict[str, Any] = Field(default_factory=dict) params: Dict[str, Any] = Field(default_factory=dict)
input_required: bool = False input_required: bool = False
input_request: Optional[Dict[str, Any]] = None input_request: Optional[Dict[str, Any]] = None
result: Optional[Dict[str, Any]] = None
# [DEF:Task.__init__:Function] # [DEF:Task.__init__:Function]
# @PURPOSE: Initializes the Task model and validates input_request for AWAITING_INPUT status. # @PURPOSE: Initializes the Task model and validates input_request for AWAITING_INPUT status.

View File

@@ -43,6 +43,7 @@ class TaskPersistenceService:
record.started_at = task.started_at record.started_at = task.started_at
record.finished_at = task.finished_at record.finished_at = task.finished_at
record.params = task.params record.params = task.params
record.result = task.result
# Store logs as JSON, converting datetime to string # Store logs as JSON, converting datetime to string
record.logs = [] record.logs = []
@@ -108,6 +109,7 @@ class TaskPersistenceService:
started_at=record.started_at, started_at=record.started_at,
finished_at=record.finished_at, finished_at=record.finished_at,
params=record.params or {}, params=record.params or {},
result=record.result,
logs=logs logs=logs
) )
loaded_tasks.append(task) loaded_tasks.append(task)

View File

@@ -0,0 +1,34 @@
# [DEF:backend.src.models.connection:Module]
#
# @SEMANTICS: database, connection, configuration, sqlalchemy, sqlite
# @PURPOSE: Defines the database schema for external database connection configurations.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> sqlalchemy
#
# @INVARIANT: All primary keys are UUID strings.
# [SECTION: IMPORTS]
from sqlalchemy import Column, String, Integer, DateTime
from sqlalchemy.sql import func
from .mapping import Base
import uuid
# [/SECTION]
# [DEF:ConnectionConfig:Class]
# @PURPOSE: Stores credentials for external databases used for column mapping.
class ConnectionConfig(Base):
__tablename__ = "connection_configs"
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
name = Column(String, nullable=False)
type = Column(String, nullable=False) # e.g., "postgres"
host = Column(String, nullable=True)
port = Column(Integer, nullable=True)
database = Column(String, nullable=True)
username = Column(String, nullable=True)
password = Column(String, nullable=True) # Encrypted/Obfuscated password
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
# [/DEF:ConnectionConfig:Class]
# [/DEF:backend.src.models.connection:Module]

View File

@@ -27,6 +27,7 @@ class TaskRecord(Base):
finished_at = Column(DateTime(timezone=True), nullable=True) finished_at = Column(DateTime(timezone=True), nullable=True)
logs = Column(JSON, nullable=True) # Store structured logs as JSON logs = Column(JSON, nullable=True) # Store structured logs as JSON
error = Column(String, nullable=True) error = Column(String, nullable=True)
result = Column(JSON, nullable=True)
created_at = Column(DateTime(timezone=True), server_default=func.now()) created_at = Column(DateTime(timezone=True), server_default=func.now())
params = Column(JSON, nullable=True) params = Column(JSON, nullable=True)
# [/DEF:TaskRecord:Class] # [/DEF:TaskRecord:Class]

View File

@@ -0,0 +1,137 @@
# [DEF:DebugPluginModule:Module]
# @SEMANTICS: plugin, debug, api, database, superset
# @PURPOSE: Implements a plugin for system diagnostics and debugging Superset API responses.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses SupersetClient from core.
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
from typing import Dict, Any, Optional
from ..core.plugin_base import PluginBase
from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
# [/SECTION]
# [DEF:DebugPlugin:Class]
# @PURPOSE: Plugin for system diagnostics and debugging.
class DebugPlugin(PluginBase):
"""
Plugin for system diagnostics and debugging.
"""
@property
def id(self) -> str:
return "system-debug"
@property
def name(self) -> str:
return "System Debug"
@property
def description(self) -> str:
return "Run system diagnostics and debug Superset API responses."
@property
def version(self) -> str:
return "1.0.0"
# [DEF:DebugPlugin.get_schema:Function]
# @PURPOSE: Returns the JSON schema for the debug plugin parameters.
def get_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"action": {
"type": "string",
"title": "Action",
"enum": ["test-db-api", "get-dataset-structure"],
"default": "test-db-api"
},
"env": {
"type": "string",
"title": "Environment",
"description": "The Superset environment (for dataset structure)."
},
"dataset_id": {
"type": "integer",
"title": "Dataset ID",
"description": "The ID of the dataset (for dataset structure)."
},
"source_env": {
"type": "string",
"title": "Source Environment",
"description": "Source env for DB API test."
},
"target_env": {
"type": "string",
"title": "Target Environment",
"description": "Target env for DB API test."
}
},
"required": ["action"]
}
# [/DEF:DebugPlugin.get_schema:Function]
# [DEF:DebugPlugin.execute:Function]
# @PURPOSE: Executes the debug logic.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
with belief_scope("DebugPlugin.execute", f"params={params}"):
action = params.get("action")
if action == "test-db-api":
return await self._test_db_api(params)
elif action == "get-dataset-structure":
return await self._get_dataset_structure(params)
else:
raise ValueError(f"Unknown action: {action}")
# [/DEF:DebugPlugin.execute:Function]
# [DEF:DebugPlugin._test_db_api:Function]
async def _test_db_api(self, params: Dict[str, Any]) -> Dict[str, Any]:
source_env_name = params.get("source_env")
target_env_name = params.get("target_env")
if not source_env_name or not target_env_name:
raise ValueError("source_env and target_env are required for test-db-api")
from ..dependencies import get_config_manager
config_manager = get_config_manager()
results = {}
for name in [source_env_name, target_env_name]:
env_config = config_manager.get_environment(name)
if not env_config:
raise ValueError(f"Environment '{name}' not found.")
client = SupersetClient(env_config)
client.authenticate()
count, dbs = client.get_databases()
results[name] = {
"count": count,
"databases": dbs
}
return results
# [DEF:DebugPlugin._get_dataset_structure:Function]
async def _get_dataset_structure(self, params: Dict[str, Any]) -> Dict[str, Any]:
env_name = params.get("env")
dataset_id = params.get("dataset_id")
if not env_name or dataset_id is None:
raise ValueError("env and dataset_id are required for get-dataset-structure")
from ..dependencies import get_config_manager
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
raise ValueError(f"Environment '{env_name}' not found.")
client = SupersetClient(env_config)
client.authenticate()
dataset_response = client.get_dataset(dataset_id)
return dataset_response.get('result') or {}
# [/DEF:DebugPlugin:Class]
# [/DEF:DebugPluginModule:Module]

View File

@@ -0,0 +1,164 @@
# [DEF:MapperPluginModule:Module]
# @SEMANTICS: plugin, mapper, datasets, postgresql, excel
# @PURPOSE: Implements a plugin for mapping dataset columns using external database connections or Excel files.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses DatasetMapper from superset_tool.
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
from typing import Dict, Any, Optional
from ..core.plugin_base import PluginBase
from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
from ..core.database import SessionLocal
from ..models.connection import ConnectionConfig
from superset_tool.utils.dataset_mapper import DatasetMapper
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:MapperPlugin:Class]
# @PURPOSE: Plugin for mapping dataset columns verbose names.
class MapperPlugin(PluginBase):
"""
Plugin for mapping dataset columns verbose names.
"""
@property
def id(self) -> str:
return "dataset-mapper"
@property
def name(self) -> str:
return "Dataset Mapper"
@property
def description(self) -> str:
return "Map dataset column verbose names using PostgreSQL comments or Excel files."
@property
def version(self) -> str:
return "1.0.0"
# [DEF:MapperPlugin.get_schema:Function]
# @PURPOSE: Returns the JSON schema for the mapper plugin parameters.
def get_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"env": {
"type": "string",
"title": "Environment",
"description": "The Superset environment (e.g., 'dev')."
},
"dataset_id": {
"type": "integer",
"title": "Dataset ID",
"description": "The ID of the dataset to update."
},
"source": {
"type": "string",
"title": "Mapping Source",
"enum": ["postgres", "excel"],
"default": "postgres"
},
"connection_id": {
"type": "string",
"title": "Saved Connection",
"description": "The ID of a saved database connection (for postgres source)."
},
"table_name": {
"type": "string",
"title": "Table Name",
"description": "Target table name in PostgreSQL."
},
"table_schema": {
"type": "string",
"title": "Table Schema",
"description": "Target table schema in PostgreSQL.",
"default": "public"
},
"excel_path": {
"type": "string",
"title": "Excel Path",
"description": "Path to the Excel file (for excel source)."
}
},
"required": ["env", "dataset_id", "source"]
}
# [/DEF:MapperPlugin.get_schema:Function]
# [DEF:MapperPlugin.execute:Function]
# @PURPOSE: Executes the dataset mapping logic.
# @PRE: Params contain valid 'env', 'dataset_id', and 'source'.
# @POST: Updates the dataset in Superset.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
with belief_scope("MapperPlugin.execute", f"params={params}"):
env_name = params.get("env")
dataset_id = params.get("dataset_id")
source = params.get("source")
if not env_name or dataset_id is None or not source:
logger.error("[MapperPlugin.execute][State] Missing required parameters.")
raise ValueError("Missing required parameters: env, dataset_id, source")
# Get config and initialize client
from ..dependencies import get_config_manager
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
client = SupersetClient(env_config)
client.authenticate()
postgres_config = None
if source == "postgres":
connection_id = params.get("connection_id")
if not connection_id:
logger.error("[MapperPlugin.execute][State] connection_id is required for postgres source.")
raise ValueError("connection_id is required for postgres source.")
# Load connection from DB
db = SessionLocal()
try:
conn_config = db.query(ConnectionConfig).filter(ConnectionConfig.id == connection_id).first()
if not conn_config:
logger.error(f"[MapperPlugin.execute][State] Connection {connection_id} not found.")
raise ValueError(f"Connection {connection_id} not found.")
postgres_config = {
'dbname': conn_config.database,
'user': conn_config.username,
'password': conn_config.password,
'host': conn_config.host,
'port': str(conn_config.port) if conn_config.port else '5432'
}
finally:
db.close()
logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}")
# Use internal SupersetLogger for DatasetMapper
s_logger = SupersetLogger(name="dataset_mapper_plugin")
mapper = DatasetMapper(s_logger)
try:
mapper.run_mapping(
superset_client=client,
dataset_id=dataset_id,
source=source,
postgres_config=postgres_config,
excel_path=params.get("excel_path"),
table_name=params.get("table_name"),
table_schema=params.get("table_schema") or "public"
)
logger.info(f"[MapperPlugin.execute][Success] Mapping completed for dataset {dataset_id}")
return {"status": "success", "dataset_id": dataset_id}
except Exception as e:
logger.error(f"[MapperPlugin.execute][Failure] Mapping failed: {e}")
raise
# [/DEF:MapperPlugin.execute:Function]
# [/DEF:MapperPlugin:Class]
# [/DEF:MapperPluginModule:Module]

View File

@@ -0,0 +1,161 @@
# [DEF:SearchPluginModule:Module]
# @SEMANTICS: plugin, search, datasets, regex, superset
# @PURPOSE: Implements a plugin for searching text patterns across all datasets in a specific Superset environment.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses SupersetClient from core.
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
import re
from typing import Dict, Any, List, Optional
from ..core.plugin_base import PluginBase
from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
# [/SECTION]
# [DEF:SearchPlugin:Class]
# @PURPOSE: Plugin for searching text patterns in Superset datasets.
class SearchPlugin(PluginBase):
"""
Plugin for searching text patterns in Superset datasets.
"""
@property
def id(self) -> str:
return "search-datasets"
@property
def name(self) -> str:
return "Search Datasets"
@property
def description(self) -> str:
return "Search for text patterns across all datasets in a specific environment."
@property
def version(self) -> str:
return "1.0.0"
# [DEF:SearchPlugin.get_schema:Function]
# @PURPOSE: Returns the JSON schema for the search plugin parameters.
def get_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"env": {
"type": "string",
"title": "Environment",
"description": "The Superset environment to search in (e.g., 'dev', 'prod')."
},
"query": {
"type": "string",
"title": "Search Query (Regex)",
"description": "The regex pattern to search for."
}
},
"required": ["env", "query"]
}
# [/DEF:SearchPlugin.get_schema:Function]
# [DEF:SearchPlugin.execute:Function]
# @PURPOSE: Executes the dataset search logic.
# @PRE: Params contain valid 'env' and 'query'.
# @POST: Returns a dictionary with count and results list.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
with belief_scope("SearchPlugin.execute", f"params={params}"):
env_name = params.get("env")
search_query = params.get("query")
if not env_name or not search_query:
logger.error("[SearchPlugin.execute][State] Missing required parameters.")
raise ValueError("Missing required parameters: env, query")
# Get config and initialize client
from ..dependencies import get_config_manager
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
client = SupersetClient(env_config)
client.authenticate()
logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}")
try:
# Ported logic from search_script.py
_, datasets = client.get_datasets(query={"columns": ["id", "table_name", "sql", "database", "columns"]})
if not datasets:
logger.warning("[SearchPlugin.execute][State] No datasets found.")
return {"count": 0, "results": []}
pattern = re.compile(search_query, re.IGNORECASE)
results = []
for dataset in datasets:
dataset_id = dataset.get('id')
dataset_name = dataset.get('table_name', 'Unknown')
if not dataset_id:
continue
for field, value in dataset.items():
value_str = str(value)
if pattern.search(value_str):
match_obj = pattern.search(value_str)
results.append({
"dataset_id": dataset_id,
"dataset_name": dataset_name,
"field": field,
"match_context": self._get_context(value_str, match_obj.group() if match_obj else ""),
"full_value": value_str
})
logger.info(f"[SearchPlugin.execute][Success] Found matches in {len(results)} locations.")
return {
"count": len(results),
"results": results
}
except re.error as e:
logger.error(f"[SearchPlugin.execute][Failure] Invalid regex pattern: {e}")
raise ValueError(f"Invalid regex pattern: {e}")
except Exception as e:
logger.error(f"[SearchPlugin.execute][Failure] Error during search: {e}")
raise
# [/DEF:SearchPlugin.execute:Function]
# [DEF:SearchPlugin._get_context:Function]
# @PURPOSE: Extracts a small context around the match for display.
def _get_context(self, text: str, match_text: str, context_lines: int = 1) -> str:
"""
Extracts a small context around the match for display.
"""
if not match_text:
return text[:100] + "..." if len(text) > 100 else text
lines = text.splitlines()
match_line_index = -1
for i, line in enumerate(lines):
if match_text in line:
match_line_index = i
break
if match_line_index != -1:
start = max(0, match_line_index - context_lines)
end = min(len(lines), match_line_index + context_lines + 1)
context = []
for i in range(start, end):
line_content = lines[i]
if i == match_line_index:
context.append(f"==> {line_content}")
else:
context.append(f" {line_content}")
return "\n".join(context)
return text[:100] + "..." if len(text) > 100 else text
# [/DEF:SearchPlugin._get_context:Function]
# [/DEF:SearchPlugin:Class]
# [/DEF:SearchPluginModule:Module]

View File

@@ -1,163 +0,0 @@
# [DEF:backup_script:Module]
#
# @SEMANTICS: backup, superset, automation, dashboard
# @PURPOSE: Этот модуль отвечает за автоматизированное резервное копирование дашбордов Superset.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: BackupConfig, backup_dashboards, main
# [SECTION: IMPORTS]
import logging
import sys
from pathlib import Path
from dataclasses import dataclass,field
from requests.exceptions import RequestException
from superset_tool.client import SupersetClient
from superset_tool.exceptions import SupersetAPIError
from superset_tool.utils.logger import SupersetLogger
from superset_tool.utils.fileio import (
save_and_unpack_dashboard,
archive_exports,
sanitize_filename,
consolidate_archive_folders,
remove_empty_directories,
RetentionPolicy
)
from superset_tool.utils.init_clients import setup_clients
# [/SECTION]
# [DEF:BackupConfig:DataClass]
# @PURPOSE: Хранит конфигурацию для процесса бэкапа.
@dataclass
class BackupConfig:
"""Конфигурация для процесса бэкапа."""
consolidate: bool = True
rotate_archive: bool = True
clean_folders: bool = True
retention_policy: RetentionPolicy = field(default_factory=RetentionPolicy)
# [/DEF:BackupConfig:DataClass]
# [DEF:backup_dashboards:Function]
# @PURPOSE: Выполняет бэкап всех доступных дашбордов для заданного клиента и окружения, пропуская ошибки экспорта.
# @PRE: `client` должен быть инициализированным экземпляром `SupersetClient`.
# @PRE: `env_name` должен быть строкой, обозначающей окружение.
# @PRE: `backup_root` должен быть валидным путем к корневой директории бэкапа.
# @POST: Дашборды экспортируются и сохраняются. Ошибки экспорта логируются и не приводят к остановке скрипта.
# @RELATION: CALLS -> client.get_dashboards
# @RELATION: CALLS -> client.export_dashboard
# @RELATION: CALLS -> save_and_unpack_dashboard
# @RELATION: CALLS -> archive_exports
# @RELATION: CALLS -> consolidate_archive_folders
# @RELATION: CALLS -> remove_empty_directories
# @PARAM: client (SupersetClient) - Клиент для доступа к API Superset.
# @PARAM: env_name (str) - Имя окружения (e.g., 'PROD').
# @PARAM: backup_root (Path) - Корневая директория для сохранения бэкапов.
# @PARAM: logger (SupersetLogger) - Инстанс логгера.
# @PARAM: config (BackupConfig) - Конфигурация процесса бэкапа.
# @RETURN: bool - `True` если все дашборды были экспортированы без критических ошибок, `False` иначе.
def backup_dashboards(
client: SupersetClient,
env_name: str,
backup_root: Path,
logger: SupersetLogger,
config: BackupConfig
) -> bool:
logger.info(f"[backup_dashboards][Entry] Starting backup for {env_name}.")
try:
dashboard_count, dashboard_meta = client.get_dashboards()
logger.info(f"[backup_dashboards][Progress] Found {dashboard_count} dashboards to export in {env_name}.")
if dashboard_count == 0:
return True
success_count = 0
for db in dashboard_meta:
dashboard_id = db.get('id')
dashboard_title = db.get('dashboard_title', 'Unknown Dashboard')
if not dashboard_id:
continue
try:
dashboard_base_dir_name = sanitize_filename(f"{dashboard_title}")
dashboard_dir = backup_root / env_name / dashboard_base_dir_name
dashboard_dir.mkdir(parents=True, exist_ok=True)
zip_content, filename = client.export_dashboard(dashboard_id)
save_and_unpack_dashboard(
zip_content=zip_content,
original_filename=filename,
output_dir=dashboard_dir,
unpack=False,
logger=logger
)
if config.rotate_archive:
archive_exports(str(dashboard_dir), policy=config.retention_policy, logger=logger)
success_count += 1
except (SupersetAPIError, RequestException, IOError, OSError) as db_error:
logger.error(f"[backup_dashboards][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True)
continue
if config.consolidate:
consolidate_archive_folders(backup_root / env_name , logger=logger)
if config.clean_folders:
remove_empty_directories(str(backup_root / env_name), logger=logger)
logger.info(f"[backup_dashboards][CoherenceCheck:Passed] Backup logic completed.")
return success_count == dashboard_count
except (RequestException, IOError) as e:
logger.critical(f"[backup_dashboards][Failure] Fatal error during backup for {env_name}: {e}", exc_info=True)
return False
# [/DEF:backup_dashboards:Function]
# [DEF:main:Function]
# @PURPOSE: Основная точка входа для запуска процесса резервного копирования.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> backup_dashboards
# @RETURN: int - Код выхода (0 - успех, 1 - ошибка).
def main() -> int:
log_dir = Path("P:\\Superset\\010 Бекапы\\Logs")
logger = SupersetLogger(log_dir=log_dir, level=logging.INFO, console=True)
logger.info("[main][Entry] Starting Superset backup process.")
exit_code = 0
try:
clients = setup_clients(logger)
superset_backup_repo = Path("P:\\Superset\\010 Бекапы")
superset_backup_repo.mkdir(parents=True, exist_ok=True)
results = {}
environments = ['dev', 'sbx', 'prod', 'preprod']
backup_config = BackupConfig(rotate_archive=True)
for env in environments:
try:
results[env] = backup_dashboards(
clients[env],
env.upper(),
superset_backup_repo,
logger=logger,
config=backup_config
)
except Exception as env_error:
logger.critical(f"[main][Failure] Critical error for environment {env}: {env_error}", exc_info=True)
results[env] = False
if not all(results.values()):
exit_code = 1
except (RequestException, IOError) as e:
logger.critical(f"[main][Failure] Fatal error in main execution: {e}", exc_info=True)
exit_code = 1
logger.info("[main][Exit] Superset backup process finished.")
return exit_code
# [/DEF:main:Function]
if __name__ == "__main__":
sys.exit(main())
# [/DEF:backup_script:Module]

View File

@@ -1,79 +0,0 @@
# [DEF:debug_db_api:Module]
#
# @SEMANTICS: debug, api, database, script
# @PURPOSE: Скрипт для отладки структуры ответа API баз данных.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: debug_database_api
# [SECTION: IMPORTS]
import json
import logging
from superset_tool.client import SupersetClient
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:debug_database_api:Function]
# @PURPOSE: Отладка структуры ответа API баз данных.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> client.get_databases
def debug_database_api():
logger = SupersetLogger(name="debug_db_api", level=logging.DEBUG)
# Инициализируем клиенты
clients = setup_clients(logger)
# Log JWT bearer tokens for each client
for env_name, client in clients.items():
try:
# Ensure authentication (access token fetched via headers property)
_ = client.headers
token = client.network._tokens.get("access_token")
logger.info(f"[debug_database_api][Token] Bearer token for {env_name}: {token}")
except Exception as exc:
logger.error(f"[debug_database_api][Token] Failed to retrieve token for {env_name}: {exc}", exc_info=True)
# Проверяем доступные окружения
print("Доступные окружения:")
for env_name, client in clients.items():
print(f" {env_name}: {client.config.base_url}")
# Выбираем два окружения для тестирования
if len(clients) < 2:
print("Недостаточно окружений для тестирования")
return
env_names = list(clients.keys())[:2]
from_env, to_env = env_names[0], env_names[1]
from_client = clients[from_env]
to_client = clients[to_env]
print(f"\nТестируем API для окружений: {from_env} -> {to_env}")
try:
# Получаем список баз данных из первого окружения
print(f"\nПолучаем список БД из {from_env}:")
count, dbs = from_client.get_databases()
print(f"Найдено {count} баз данных")
print("Полный ответ API:")
print(json.dumps({"count": count, "result": dbs}, indent=2, ensure_ascii=False))
# Получаем список баз данных из второго окружения
print(f"\nПолучаем список БД из {to_env}:")
count, dbs = to_client.get_databases()
print(f"Найдено {count} баз данных")
print("Полный ответ API:")
print(json.dumps({"count": count, "result": dbs}, indent=2, ensure_ascii=False))
except Exception as e:
print(f"Ошибка при тестировании API: {e}")
import traceback
traceback.print_exc()
# [/DEF:debug_database_api:Function]
if __name__ == "__main__":
debug_database_api()
# [/DEF:debug_db_api:Module]

View File

@@ -35,12 +35,25 @@
> >
Tasks Tasks
</a> </a>
<a <div class="relative inline-block group">
href="/settings" <button class="text-gray-600 hover:text-blue-600 font-medium pb-1 {$page.url.pathname.startsWith('/tools') ? 'text-blue-600 border-b-2 border-blue-600' : ''}">
class="text-gray-600 hover:text-blue-600 font-medium {$page.url.pathname === '/settings' ? 'text-blue-600 border-b-2 border-blue-600' : ''}" Tools
> </button>
Settings <div class="absolute hidden group-hover:block bg-white shadow-lg rounded-md mt-1 py-2 w-48 z-10 border border-gray-100">
</a> <a href="/tools/search" class="block px-4 py-2 text-sm text-gray-700 hover:bg-blue-50 hover:text-blue-600">Dataset Search</a>
<a href="/tools/mapper" class="block px-4 py-2 text-sm text-gray-700 hover:bg-blue-50 hover:text-blue-600">Dataset Mapper</a>
<a href="/tools/debug" class="block px-4 py-2 text-sm text-gray-700 hover:bg-blue-50 hover:text-blue-600">System Debug</a>
</div>
</div>
<div class="relative inline-block group">
<button class="text-gray-600 hover:text-blue-600 font-medium pb-1 {$page.url.pathname.startsWith('/settings') ? 'text-blue-600 border-b-2 border-blue-600' : ''}">
Settings
</button>
<div class="absolute hidden group-hover:block bg-white shadow-lg rounded-md mt-1 py-2 w-48 z-10 border border-gray-100">
<a href="/settings" class="block px-4 py-2 text-sm text-gray-700 hover:bg-blue-50 hover:text-blue-600">General Settings</a>
<a href="/settings/connections" class="block px-4 py-2 text-sm text-gray-700 hover:bg-blue-50 hover:text-blue-600">Connections</a>
</div>
</div>
</nav> </nav>
</header> </header>
<!-- [/DEF:Navbar:Component] --> <!-- [/DEF:Navbar:Component] -->

View File

@@ -0,0 +1,99 @@
<!-- [DEF:ConnectionForm:Component] -->
<!--
@SEMANTICS: connection, form, settings
@PURPOSE: UI component for creating a new database connection configuration.
@LAYER: UI
@RELATION: USES -> frontend/src/services/connectionService.js
-->
<script>
// [SECTION: IMPORTS]
import { createEventDispatcher } from 'svelte';
import { createConnection } from '../../services/connectionService.js';
import { addToast } from '../../lib/toasts.js';
// [/SECTION]
const dispatch = createEventDispatcher();
let name = '';
let type = 'postgres';
let host = '';
let port = 5432;
let database = '';
let username = '';
let password = '';
let isSubmitting = false;
// [DEF:handleSubmit:Function]
// @PURPOSE: Submits the connection form to the backend.
async function handleSubmit() {
if (!name || !host || !database || !username || !password) {
addToast('Please fill in all required fields', 'warning');
return;
}
isSubmitting = true;
try {
const newConnection = await createConnection({
name, type, host, port, database, username, password
});
addToast('Connection created successfully', 'success');
dispatch('success', newConnection);
resetForm();
} catch (e) {
addToast(e.message, 'error');
} finally {
isSubmitting = false;
}
}
function resetForm() {
name = '';
host = '';
port = 5432;
database = '';
username = '';
password = '';
}
</script>
<!-- [SECTION: TEMPLATE] -->
<div class="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
<h3 class="text-lg font-medium text-gray-900 mb-4">Add New Connection</h3>
<form on:submit|preventDefault={handleSubmit} class="space-y-4">
<div>
<label for="conn-name" class="block text-sm font-medium text-gray-700">Connection Name</label>
<input type="text" id="conn-name" bind:value={name} placeholder="e.g. Production DWH" class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="conn-host" class="block text-sm font-medium text-gray-700">Host</label>
<input type="text" id="conn-host" bind:value={host} placeholder="10.0.0.1" class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
<div>
<label for="conn-port" class="block text-sm font-medium text-gray-700">Port</label>
<input type="number" id="conn-port" bind:value={port} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
</div>
<div>
<label for="conn-db" class="block text-sm font-medium text-gray-700">Database Name</label>
<input type="text" id="conn-db" bind:value={database} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="conn-user" class="block text-sm font-medium text-gray-700">Username</label>
<input type="text" id="conn-user" bind:value={username} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
<div>
<label for="conn-pass" class="block text-sm font-medium text-gray-700">Password</label>
<input type="password" id="conn-pass" bind:value={password} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
</div>
<div class="flex justify-end pt-2">
<button type="submit" disabled={isSubmitting} class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50">
{isSubmitting ? 'Creating...' : 'Create Connection'}
</button>
</div>
</form>
</div>
<!-- [/SECTION] -->
<!-- [/DEF:ConnectionForm:Component] -->

View File

@@ -0,0 +1,82 @@
<!-- [DEF:ConnectionList:Component] -->
<!--
@SEMANTICS: connection, list, settings
@PURPOSE: UI component for listing and deleting saved database connection configurations.
@LAYER: UI
@RELATION: USES -> frontend/src/services/connectionService.js
-->
<script>
// [SECTION: IMPORTS]
import { onMount, createEventDispatcher } from 'svelte';
import { getConnections, deleteConnection } from '../../services/connectionService.js';
import { addToast } from '../../lib/toasts.js';
// [/SECTION]
const dispatch = createEventDispatcher();
let connections = [];
let isLoading = true;
// [DEF:fetchConnections:Function]
// @PURPOSE: Fetches the list of connections from the backend.
async function fetchConnections() {
isLoading = true;
try {
connections = await getConnections();
} catch (e) {
addToast('Failed to fetch connections', 'error');
} finally {
isLoading = false;
}
}
// [DEF:handleDelete:Function]
// @PURPOSE: Deletes a connection configuration.
async function handleDelete(id) {
if (!confirm('Are you sure you want to delete this connection?')) return;
try {
await deleteConnection(id);
addToast('Connection deleted', 'success');
await fetchConnections();
} catch (e) {
addToast(e.message, 'error');
}
}
onMount(fetchConnections);
// Expose fetchConnections to parent
export { fetchConnections };
</script>
<!-- [SECTION: TEMPLATE] -->
<div class="bg-white shadow overflow-hidden sm:rounded-md border border-gray-200">
<div class="px-4 py-5 sm:px-6 bg-gray-50 border-b border-gray-200">
<h3 class="text-lg leading-6 font-medium text-gray-900">Saved Connections</h3>
</div>
<ul class="divide-y divide-gray-200">
{#if isLoading}
<li class="p-4 text-center text-gray-500">Loading...</li>
{:else if connections.length === 0}
<li class="p-8 text-center text-gray-500 italic">No connections saved yet.</li>
{:else}
{#each connections as conn}
<li class="p-4 flex items-center justify-between hover:bg-gray-50">
<div>
<div class="text-sm font-medium text-indigo-600 truncate">{conn.name}</div>
<div class="text-xs text-gray-500">{conn.type}://{conn.username}@{conn.host}:{conn.port}/{conn.database}</div>
</div>
<button
on:click={() => handleDelete(conn.id)}
class="ml-2 inline-flex items-center px-2 py-1 border border-transparent text-xs font-medium rounded text-red-700 bg-red-100 hover:bg-red-200 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500"
>
Delete
</button>
</li>
{/each}
{/if}
</ul>
</div>
<!-- [/SECTION] -->
<!-- [/DEF:ConnectionList:Component] -->

View File

@@ -0,0 +1,164 @@
<!-- [DEF:DebugTool:Component] -->
<!--
@SEMANTICS: debug, tool, api, structure
@PURPOSE: UI component for system diagnostics and debugging API responses.
@LAYER: UI
@RELATION: USES -> frontend/src/services/toolsService.js
-->
<script>
// [SECTION: IMPORTS]
import { onMount } from 'svelte';
import { runTask, getTaskStatus } from '../../services/toolsService.js';
import { selectedTask } from '../../lib/stores.js';
import { addToast } from '../../lib/toasts.js';
// [/SECTION]
let envs = [];
let action = 'test-db-api';
let selectedEnv = '';
let datasetId = '';
let sourceEnv = '';
let targetEnv = '';
let isRunning = false;
let results = null;
let pollInterval;
async function fetchEnvironments() {
try {
const res = await fetch('/api/environments');
envs = await res.json();
} catch (e) {
addToast('Failed to fetch environments', 'error');
}
}
async function handleRunDebug() {
isRunning = true;
results = null;
try {
let params = { action };
if (action === 'test-db-api') {
if (!sourceEnv || !targetEnv) {
addToast('Source and Target environments are required', 'warning');
isRunning = false;
return;
}
const sEnv = envs.find(e => e.id === sourceEnv);
const tEnv = envs.find(e => e.id === targetEnv);
params.source_env = sEnv.name;
params.target_env = tEnv.name;
} else {
if (!selectedEnv || !datasetId) {
addToast('Environment and Dataset ID are required', 'warning');
isRunning = false;
return;
}
const env = envs.find(e => e.id === selectedEnv);
params.env = env.name;
params.dataset_id = parseInt(datasetId);
}
const task = await runTask('system-debug', params);
selectedTask.set(task);
startPolling(task.id);
} catch (e) {
isRunning = false;
addToast(e.message, 'error');
}
}
function startPolling(taskId) {
if (pollInterval) clearInterval(pollInterval);
pollInterval = setInterval(async () => {
try {
const task = await getTaskStatus(taskId);
selectedTask.set(task);
if (task.status === 'SUCCESS') {
clearInterval(pollInterval);
isRunning = false;
results = task.result;
addToast('Debug task completed', 'success');
} else if (task.status === 'FAILED') {
clearInterval(pollInterval);
isRunning = false;
addToast('Debug task failed', 'error');
}
} catch (e) {
clearInterval(pollInterval);
isRunning = false;
}
}, 2000);
}
onMount(fetchEnvironments);
</script>
<div class="space-y-6">
<div class="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
<h3 class="text-lg font-medium text-gray-900 mb-4">System Diagnostics</h3>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700">Debug Action</label>
<select bind:value={action} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="test-db-api">Test Database API (Compare Envs)</option>
<option value="get-dataset-structure">Get Dataset Structure (JSON)</option>
</select>
</div>
{#if action === 'test-db-api'}
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="src-env" class="block text-sm font-medium text-gray-700">Source Environment</label>
<select id="src-env" bind:value={sourceEnv} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="" disabled>-- Select Source --</option>
{#each envs as env}
<option value={env.id}>{env.name}</option>
{/each}
</select>
</div>
<div>
<label for="tgt-env" class="block text-sm font-medium text-gray-700">Target Environment</label>
<select id="tgt-env" bind:value={targetEnv} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="" disabled>-- Select Target --</option>
{#each envs as env}
<option value={env.id}>{env.name}</option>
{/each}
</select>
</div>
</div>
{:else}
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="debug-env" class="block text-sm font-medium text-gray-700">Environment</label>
<select id="debug-env" bind:value={selectedEnv} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="" disabled>-- Select Environment --</option>
{#each envs as env}
<option value={env.id}>{env.name}</option>
{/each}
</select>
</div>
<div>
<label for="debug-ds-id" class="block text-sm font-medium text-gray-700">Dataset ID</label>
<input type="number" id="debug-ds-id" bind:value={datasetId} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
</div>
{/if}
<div class="mt-4 flex justify-end">
<button on:click={handleRunDebug} disabled={isRunning} class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50">
{isRunning ? 'Running...' : 'Run Diagnostics'}
</button>
</div>
</div>
{#if results}
<div class="bg-white shadow overflow-hidden sm:rounded-md border border-gray-200">
<div class="px-4 py-5 sm:px-6 bg-gray-50 border-b border-gray-200">
<h3 class="text-lg leading-6 font-medium text-gray-900">Debug Output</h3>
</div>
<div class="p-4">
<pre class="text-xs text-gray-600 bg-gray-900 text-green-400 p-4 rounded-md overflow-x-auto h-96">{JSON.stringify(results, null, 2)}</pre>
</div>
</div>
{/if}
</div>

View File

@@ -0,0 +1,159 @@
<!-- [DEF:MapperTool:Component] -->
<!--
@SEMANTICS: mapper, tool, dataset, postgresql, excel
@PURPOSE: UI component for mapping dataset column verbose names using the MapperPlugin.
@LAYER: UI
@RELATION: USES -> frontend/src/services/toolsService.js
@RELATION: USES -> frontend/src/services/connectionService.js
-->
<script>
// [SECTION: IMPORTS]
import { onMount } from 'svelte';
import { runTask } from '../../services/toolsService.js';
import { getConnections } from '../../services/connectionService.js';
import { selectedTask } from '../../lib/stores.js';
import { addToast } from '../../lib/toasts.js';
// [/SECTION]
let envs = [];
let connections = [];
let selectedEnv = '';
let datasetId = '';
let source = 'postgres';
let selectedConnection = '';
let tableName = '';
let tableSchema = 'public';
let excelPath = '';
let isRunning = false;
// [DEF:fetchData:Function]
// @PURPOSE: Fetches environments and saved connections.
async function fetchData() {
try {
const envsRes = await fetch('/api/environments');
envs = await envsRes.json();
connections = await getConnections();
} catch (e) {
addToast('Failed to fetch data', 'error');
}
}
// [DEF:handleRunMapper:Function]
// @PURPOSE: Triggers the MapperPlugin task.
async function handleRunMapper() {
if (!selectedEnv || !datasetId) {
addToast('Please fill in required fields', 'warning');
return;
}
if (source === 'postgres' && (!selectedConnection || !tableName)) {
addToast('Connection and Table Name are required for postgres source', 'warning');
return;
}
if (source === 'excel' && !excelPath) {
addToast('Excel path is required for excel source', 'warning');
return;
}
isRunning = true;
try {
const env = envs.find(e => e.id === selectedEnv);
const task = await runTask('dataset-mapper', {
env: env.name,
dataset_id: parseInt(datasetId),
source,
connection_id: selectedConnection,
table_name: tableName,
table_schema: tableSchema,
excel_path: excelPath
});
selectedTask.set(task);
addToast('Mapper task started', 'success');
} catch (e) {
addToast(e.message, 'error');
} finally {
isRunning = false;
}
}
onMount(fetchData);
</script>
<!-- [SECTION: TEMPLATE] -->
<div class="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
<h3 class="text-lg font-medium text-gray-900 mb-4">Dataset Column Mapper</h3>
<div class="space-y-4">
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="mapper-env" class="block text-sm font-medium text-gray-700">Environment</label>
<select id="mapper-env" bind:value={selectedEnv} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="" disabled>-- Select Environment --</option>
{#each envs as env}
<option value={env.id}>{env.name}</option>
{/each}
</select>
</div>
<div>
<label for="mapper-ds-id" class="block text-sm font-medium text-gray-700">Dataset ID</label>
<input type="number" id="mapper-ds-id" bind:value={datasetId} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
</div>
<div>
<label class="block text-sm font-medium text-gray-700">Mapping Source</label>
<div class="mt-2 flex space-x-4">
<label class="inline-flex items-center">
<input type="radio" bind:group={source} value="postgres" class="focus:ring-indigo-500 h-4 w-4 text-indigo-600 border-gray-300" />
<span class="ml-2 text-sm text-gray-700">PostgreSQL</span>
</label>
<label class="inline-flex items-center">
<input type="radio" bind:group={source} value="excel" class="focus:ring-indigo-500 h-4 w-4 text-indigo-600 border-gray-300" />
<span class="ml-2 text-sm text-gray-700">Excel</span>
</label>
</div>
</div>
{#if source === 'postgres'}
<div class="space-y-4 p-4 bg-gray-50 rounded-md border border-gray-100">
<div>
<label for="mapper-conn" class="block text-sm font-medium text-gray-700">Saved Connection</label>
<select id="mapper-conn" bind:value={selectedConnection} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm">
<option value="" disabled>-- Select Connection --</option>
{#each connections as conn}
<option value={conn.id}>{conn.name}</option>
{/each}
</select>
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<label for="mapper-table" class="block text-sm font-medium text-gray-700">Table Name</label>
<input type="text" id="mapper-table" bind:value={tableName} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
<div>
<label for="mapper-schema" class="block text-sm font-medium text-gray-700">Table Schema</label>
<input type="text" id="mapper-schema" bind:value={tableSchema} class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
</div>
</div>
{:else}
<div class="p-4 bg-gray-50 rounded-md border border-gray-100">
<label for="mapper-excel" class="block text-sm font-medium text-gray-700">Excel File Path</label>
<input type="text" id="mapper-excel" bind:value={excelPath} placeholder="/path/to/mapping.xlsx" class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" />
</div>
{/if}
<div class="flex justify-end">
<button
on:click={handleRunMapper}
disabled={isRunning}
class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50"
>
{isRunning ? 'Starting...' : 'Run Mapper'}
</button>
</div>
</div>
</div>
<!-- [/SECTION] -->
<!-- [/DEF:MapperTool:Component] -->

View File

@@ -0,0 +1,177 @@
<!-- [DEF:SearchTool:Component] -->
<!--
@SEMANTICS: search, tool, dataset, regex
@PURPOSE: UI component for searching datasets using the SearchPlugin.
@LAYER: UI
@RELATION: USES -> frontend/src/services/toolsService.js
-->
<script>
// [SECTION: IMPORTS]
import { onMount } from 'svelte';
import { runTask, getTaskStatus } from '../../services/toolsService.js';
import { selectedTask } from '../../lib/stores.js';
import { addToast } from '../../lib/toasts.js';
// [/SECTION]
let envs = [];
let selectedEnv = '';
let searchQuery = '';
let isRunning = false;
let results = null;
let pollInterval;
// [DEF:fetchEnvironments:Function]
// @PURPOSE: Fetches the list of available environments.
async function fetchEnvironments() {
try {
const res = await fetch('/api/environments');
envs = await res.json();
} catch (e) {
addToast('Failed to fetch environments', 'error');
}
}
// [DEF:handleSearch:Function]
// @PURPOSE: Triggers the SearchPlugin task.
async function handleSearch() {
if (!selectedEnv || !searchQuery) {
addToast('Please select environment and enter query', 'warning');
return;
}
isRunning = true;
results = null;
try {
// Find the environment name from ID
const env = envs.find(e => e.id === selectedEnv);
const task = await runTask('search-datasets', {
env: env.name,
query: searchQuery
});
selectedTask.set(task);
startPolling(task.id);
} catch (e) {
isRunning = false;
addToast(e.message, 'error');
}
}
// [DEF:startPolling:Function]
// @PURPOSE: Polls for task completion and results.
function startPolling(taskId) {
if (pollInterval) clearInterval(pollInterval);
pollInterval = setInterval(async () => {
try {
const task = await getTaskStatus(taskId);
selectedTask.set(task);
if (task.status === 'SUCCESS') {
clearInterval(pollInterval);
isRunning = false;
results = task.result;
addToast('Search completed', 'success');
} else if (task.status === 'FAILED') {
clearInterval(pollInterval);
isRunning = false;
addToast('Search failed', 'error');
}
} catch (e) {
clearInterval(pollInterval);
isRunning = false;
addToast('Error polling task status', 'error');
}
}, 2000);
}
onMount(fetchEnvironments);
</script>
<!-- [SECTION: TEMPLATE] -->
<div class="space-y-6">
<div class="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
<h3 class="text-lg font-medium text-gray-900 mb-4">Search Dataset Metadata</h3>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 items-end">
<div>
<label for="env-select" class="block text-sm font-medium text-gray-700">Environment</label>
<select
id="env-select"
bind:value={selectedEnv}
class="mt-1 block w-full pl-3 pr-10 py-2 text-base border-gray-300 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md"
>
<option value="" disabled>-- Select Environment --</option>
{#each envs as env}
<option value={env.id}>{env.name}</option>
{/each}
</select>
</div>
<div>
<label for="search-query" class="block text-sm font-medium text-gray-700">Regex Pattern</label>
<input
type="text"
id="search-query"
bind:value={searchQuery}
placeholder="e.g. from dm.*\.account"
class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
/>
</div>
</div>
<div class="mt-4 flex justify-end">
<button
on:click={handleSearch}
disabled={isRunning}
class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50"
>
{#if isRunning}
<svg class="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
Searching...
{:else}
Search
{/if}
</button>
</div>
</div>
{#if results}
<div class="bg-white shadow overflow-hidden sm:rounded-md border border-gray-200">
<div class="px-4 py-5 sm:px-6 flex justify-between items-center bg-gray-50 border-b border-gray-200">
<h3 class="text-lg leading-6 font-medium text-gray-900">
Search Results
</h3>
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
{results.count} matches
</span>
</div>
<ul class="divide-y divide-gray-200">
{#each results.results as item}
<li class="p-4 hover:bg-gray-50">
<div class="flex items-center justify-between">
<div class="text-sm font-medium text-indigo-600 truncate">
{item.dataset_name} (ID: {item.dataset_id})
</div>
<div class="ml-2 flex-shrink-0 flex">
<p class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full bg-green-100 text-green-800">
Field: {item.field}
</p>
</div>
</div>
<div class="mt-2">
<pre class="text-xs text-gray-500 bg-gray-50 p-2 rounded border border-gray-100 overflow-x-auto">{item.match_context}</pre>
</div>
</li>
{/each}
{#if results.count === 0}
<li class="p-8 text-center text-gray-500 italic">
No matches found for the given pattern.
</li>
{/if}
</ul>
</div>
{/if}
</div>
<!-- [/SECTION] -->
<!-- [/DEF:SearchTool:Component] -->

View File

@@ -0,0 +1,34 @@
<!-- [DEF:ConnectionsSettingsPage:Component] -->
<!--
@SEMANTICS: settings, connections, page
@PURPOSE: Page for managing database connection configurations.
@LAYER: UI
-->
<script>
import ConnectionForm from '../../../components/tools/ConnectionForm.svelte';
import ConnectionList from '../../../components/tools/ConnectionList.svelte';
let listComponent;
function handleSuccess() {
if (listComponent) {
listComponent.fetchConnections();
}
}
</script>
<div class="max-w-7xl mx-auto py-6 sm:px-6 lg:px-8">
<div class="px-4 py-6 sm:px-0">
<h1 class="text-2xl font-semibold text-gray-900 mb-6">Connection Management</h1>
<div class="grid grid-cols-1 lg:grid-cols-2 gap-8">
<div>
<ConnectionForm on:success={handleSuccess} />
</div>
<div>
<ConnectionList bind:this={listComponent} />
</div>
</div>
</div>
</div>
<!-- [/DEF:ConnectionsSettingsPage:Component] -->

View File

@@ -0,0 +1,26 @@
<!-- [DEF:DebugPage:Component] -->
<!--
@SEMANTICS: debug, page, tool
@PURPOSE: Page for system diagnostics and debugging.
@LAYER: UI
-->
<script>
import DebugTool from '../../../components/tools/DebugTool.svelte';
import TaskRunner from '../../../components/TaskRunner.svelte';
</script>
<div class="max-w-7xl mx-auto py-6 sm:px-6 lg:px-8">
<div class="px-4 py-6 sm:px-0">
<h1 class="text-2xl font-semibold text-gray-900 mb-6">System Diagnostics</h1>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<div class="lg:col-span-2">
<DebugTool />
</div>
<div class="lg:col-span-1">
<TaskRunner />
</div>
</div>
</div>
</div>
<!-- [/DEF:DebugPage:Component] -->

View File

@@ -0,0 +1,26 @@
<!-- [DEF:MapperPage:Component] -->
<!--
@SEMANTICS: mapper, page, tool
@PURPOSE: Page for the dataset column mapper tool.
@LAYER: UI
-->
<script>
import MapperTool from '../../../components/tools/MapperTool.svelte';
import TaskRunner from '../../../components/TaskRunner.svelte';
</script>
<div class="max-w-7xl mx-auto py-6 sm:px-6 lg:px-8">
<div class="px-4 py-6 sm:px-0">
<h1 class="text-2xl font-semibold text-gray-900 mb-6">Dataset Column Mapper</h1>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<div class="lg:col-span-2">
<MapperTool />
</div>
<div class="lg:col-span-1">
<TaskRunner />
</div>
</div>
</div>
</div>
<!-- [/DEF:MapperPage:Component] -->

View File

@@ -0,0 +1,26 @@
<!-- [DEF:SearchPage:Component] -->
<!--
@SEMANTICS: search, page, tool
@PURPOSE: Page for the dataset search tool.
@LAYER: UI
-->
<script>
import SearchTool from '../../../components/tools/SearchTool.svelte';
import TaskRunner from '../../../components/TaskRunner.svelte';
</script>
<div class="max-w-7xl mx-auto py-6 sm:px-6 lg:px-8">
<div class="px-4 py-6 sm:px-0">
<h1 class="text-2xl font-semibold text-gray-900 mb-6">Dataset Search</h1>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<div class="lg:col-span-2">
<SearchTool />
</div>
<div class="lg:col-span-1">
<TaskRunner />
</div>
</div>
</div>
</div>
<!-- [/DEF:SearchPage:Component] -->

View File

@@ -0,0 +1,52 @@
/**
* Service for interacting with the Connection Management API.
*/
const API_BASE = '/api/settings/connections';
/**
* Fetch a list of saved connections.
* @returns {Promise<Array>} List of connections.
*/
export async function getConnections() {
const response = await fetch(API_BASE);
if (!response.ok) {
throw new Error(`Failed to fetch connections: ${response.statusText}`);
}
return await response.json();
}
/**
* Create a new connection configuration.
* @param {Object} connectionData - The connection data.
* @returns {Promise<Object>} The created connection instance.
*/
export async function createConnection(connectionData) {
const response = await fetch(API_BASE, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(connectionData)
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || `Failed to create connection: ${response.statusText}`);
}
return await response.json();
}
/**
* Delete a connection configuration.
* @param {string} connectionId - The ID of the connection to delete.
*/
export async function deleteConnection(connectionId) {
const response = await fetch(`${API_BASE}/${connectionId}`, {
method: 'DELETE'
});
if (!response.ok) {
throw new Error(`Failed to delete connection: ${response.statusText}`);
}
}

View File

@@ -0,0 +1,40 @@
/**
* Service for generic Task API communication used by Tools.
*/
const API_BASE = '/api/tasks';
/**
* Start a new task for a given plugin.
* @param {string} pluginId - The ID of the plugin to run.
* @param {Object} params - Parameters for the plugin.
* @returns {Promise<Object>} The created task instance.
*/
export async function runTask(pluginId, params) {
const response = await fetch(API_BASE, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ plugin_id: pluginId, params })
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || `Failed to start task: ${response.statusText}`);
}
return await response.json();
}
/**
* Fetch details for a specific task (to poll status or get result).
* @param {string} taskId - The ID of the task.
* @returns {Promise<Object>} Task details.
*/
export async function getTaskStatus(taskId) {
const response = await fetch(`${API_BASE}/${taskId}`);
if (!response.ok) {
throw new Error(`Failed to fetch task ${taskId}: ${response.statusText}`);
}
return await response.json();
}

View File

@@ -1,64 +0,0 @@
# [DEF:get_dataset_structure:Module]
#
# @SEMANTICS: superset, dataset, structure, debug, json
# @PURPOSE: Этот модуль предназначен для получения и сохранения структуры данных датасета из Superset. Он используется для отладки и анализа данных, возвращаемых API.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils.init_clients
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger
# @PUBLIC_API: get_and_save_dataset
# [SECTION: IMPORTS]
import argparse
import json
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:get_and_save_dataset:Function]
# @PURPOSE: Получает структуру датасета из Superset и сохраняет ее в JSON-файл.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> superset_client.get_dataset
# @PARAM: env (str) - Среда (dev, prod, и т.д.) для подключения.
# @PARAM: dataset_id (int) - ID датасета для получения.
# @PARAM: output_path (str) - Путь для сохранения JSON-файла.
def get_and_save_dataset(env: str, dataset_id: int, output_path: str):
"""
Получает структуру датасета и сохраняет в файл.
"""
logger = SupersetLogger(name="DatasetStructureRetriever")
logger.info("[get_and_save_dataset][Enter] Starting to fetch dataset structure for ID %d from env '%s'.", dataset_id, env)
try:
clients = setup_clients(logger=logger)
superset_client = clients.get(env)
if not superset_client:
logger.error("[get_and_save_dataset][Failure] Environment '%s' not found.", env)
return
dataset_response = superset_client.get_dataset(dataset_id)
dataset_data = dataset_response.get('result')
if not dataset_data:
logger.error("[get_and_save_dataset][Failure] No result in dataset response.")
return
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(dataset_data, f, ensure_ascii=False, indent=4)
logger.info("[get_and_save_dataset][Success] Dataset structure saved to %s.", output_path)
except Exception as e:
logger.error("[get_and_save_dataset][Failure] An error occurred: %s", e, exc_info=True)
# [/DEF:get_and_save_dataset:Function]
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Получение структуры датасета из Superset.")
parser.add_argument("--dataset-id", required=True, type=int, help="ID датасета.")
parser.add_argument("--env", required=True, help="Среда для подключения (например, dev).")
parser.add_argument("--output-path", default="dataset_structure.json", help="Путь для сохранения JSON-файла.")
args = parser.parse_args()
get_and_save_dataset(args.env, args.dataset_id, args.output_path)
# [/DEF:get_dataset_structure:Module]

View File

@@ -1,72 +0,0 @@
# [DEF:run_mapper:Module]
#
# @SEMANTICS: runner, configuration, cli, main
# @PURPOSE: Этот модуль является CLI-точкой входа для запуска процесса меппинга метаданных датасетов.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.utils.dataset_mapper
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: main
# [SECTION: IMPORTS]
import argparse
import keyring
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.logger import SupersetLogger
from superset_tool.utils.dataset_mapper import DatasetMapper
# [/SECTION]
# [DEF:main:Function]
# @PURPOSE: Парсит аргументы командной строки и запускает процесс меппинга.
# @RELATION: CREATES_INSTANCE_OF -> DatasetMapper
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> DatasetMapper.run_mapping
def main():
parser = argparse.ArgumentParser(description="Map dataset verbose names in Superset.")
parser.add_argument('--source', type=str, required=True, choices=['postgres', 'excel', 'both'], help='The source for the mapping.')
parser.add_argument('--dataset-id', type=int, required=True, help='The ID of the dataset to update.')
parser.add_argument('--table-name', type=str, help='The table name for PostgreSQL source.')
parser.add_argument('--table-schema', type=str, help='The table schema for PostgreSQL source.')
parser.add_argument('--excel-path', type=str, help='The path to the Excel file.')
parser.add_argument('--env', type=str, default='dev', help='The Superset environment to use.')
args = parser.parse_args()
logger = SupersetLogger(name="dataset_mapper_main")
# [AI_NOTE]: Конфигурация БД должна быть вынесена во внешний файл или переменные окружения.
POSTGRES_CONFIG = {
'dbname': 'dwh',
'user': keyring.get_password("system", f"dwh gp user"),
'password': keyring.get_password("system", f"dwh gp password"),
'host': '10.66.229.201',
'port': '5432'
}
logger.info("[main][Enter] Starting dataset mapper CLI.")
try:
clients = setup_clients(logger)
superset_client = clients.get(args.env)
if not superset_client:
logger.error(f"[main][Failure] Superset client for '{args.env}' environment not found.")
return
mapper = DatasetMapper(logger)
mapper.run_mapping(
superset_client=superset_client,
dataset_id=args.dataset_id,
source=args.source,
postgres_config=POSTGRES_CONFIG if args.source in ['postgres', 'both'] else None,
excel_path=args.excel_path if args.source in ['excel', 'both'] else None,
table_name=args.table_name if args.source in ['postgres', 'both'] else None,
table_schema=args.table_schema if args.source in ['postgres', 'both'] else None
)
logger.info("[main][Exit] Dataset mapper process finished.")
except Exception as main_exc:
logger.error("[main][Failure] An unexpected error occurred: %s", main_exc, exc_info=True)
# [/DEF:main:Function]
if __name__ == '__main__':
main()
# [/DEF:run_mapper:Module]

View File

@@ -1,204 +0,0 @@
# [DEF:search_script:Module]
#
# @SEMANTICS: search, superset, dataset, regex, file_output
# @PURPOSE: Предоставляет утилиты для поиска по текстовым паттернам в метаданных датасетов Superset.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: search_datasets, save_results_to_file, print_search_results, main
# [SECTION: IMPORTS]
import logging
import re
import os
from typing import Dict, Optional
from requests.exceptions import RequestException
from superset_tool.client import SupersetClient
from superset_tool.exceptions import SupersetAPIError
from superset_tool.utils.logger import SupersetLogger
from superset_tool.utils.init_clients import setup_clients
# [/SECTION]
# [DEF:search_datasets:Function]
# @PURPOSE: Выполняет поиск по строковому паттерну в метаданных всех датасетов.
# @PRE: `client` должен быть инициализированным экземпляром `SupersetClient`.
# @PRE: `search_pattern` должен быть валидной строкой регулярного выражения.
# @POST: Возвращает словарь с результатами поиска, где ключ - ID датасета, значение - список совпадений.
# @RELATION: CALLS -> client.get_datasets
# @THROW: re.error - Если паттерн регулярного выражения невалиден.
# @THROW: SupersetAPIError, RequestException - При критических ошибках API.
# @PARAM: client (SupersetClient) - Клиент для доступа к API Superset.
# @PARAM: search_pattern (str) - Регулярное выражение для поиска.
# @PARAM: logger (Optional[SupersetLogger]) - Инстанс логгера.
# @RETURN: Optional[Dict] - Словарь с результатами или None, если ничего не найдено.
def search_datasets(
client: SupersetClient,
search_pattern: str,
logger: Optional[SupersetLogger] = None
) -> Optional[Dict]:
logger = logger or SupersetLogger(name="dataset_search")
logger.info(f"[search_datasets][Enter] Searching for pattern: '{search_pattern}'")
try:
_, datasets = client.get_datasets(query={"columns": ["id", "table_name", "sql", "database", "columns"]})
if not datasets:
logger.warning("[search_datasets][State] No datasets found.")
return None
pattern = re.compile(search_pattern, re.IGNORECASE)
results = {}
for dataset in datasets:
dataset_id = dataset.get('id')
if not dataset_id:
continue
matches = []
for field, value in dataset.items():
value_str = str(value)
if pattern.search(value_str):
match_obj = pattern.search(value_str)
matches.append({
"field": field,
"match": match_obj.group() if match_obj else "",
"value": value_str
})
if matches:
results[dataset_id] = matches
logger.info(f"[search_datasets][Success] Found matches in {len(results)} datasets.")
return results
except re.error as e:
logger.error(f"[search_datasets][Failure] Invalid regex pattern: {e}", exc_info=True)
raise
except (SupersetAPIError, RequestException) as e:
logger.critical(f"[search_datasets][Failure] Critical error during search: {e}", exc_info=True)
raise
# [/DEF:search_datasets:Function]
# [DEF:save_results_to_file:Function]
# @PURPOSE: Сохраняет результаты поиска в текстовый файл.
# @PRE: `results` является словарем, возвращенным `search_datasets`, или `None`.
# @PRE: `filename` должен быть допустимым путем к файлу.
# @POST: Записывает отформатированные результаты в указанный файл.
# @PARAM: results (Optional[Dict]) - Словарь с результатами поиска.
# @PARAM: filename (str) - Имя файла для сохранения результатов.
# @PARAM: logger (Optional[SupersetLogger]) - Инстанс логгера.
# @RETURN: bool - Успешно ли выполнено сохранение.
def save_results_to_file(results: Optional[Dict], filename: str, logger: Optional[SupersetLogger] = None) -> bool:
logger = logger or SupersetLogger(name="file_writer")
logger.info(f"[save_results_to_file][Enter] Saving results to file: {filename}")
try:
formatted_report = print_search_results(results)
with open(filename, 'w', encoding='utf-8') as f:
f.write(formatted_report)
logger.info(f"[save_results_to_file][Success] Results saved to {filename}")
return True
except Exception as e:
logger.error(f"[save_results_to_file][Failure] Failed to save results to file: {e}", exc_info=True)
return False
# [/DEF:save_results_to_file:Function]
# [DEF:print_search_results:Function]
# @PURPOSE: Форматирует результаты поиска для читаемого вывода в консоль.
# @PRE: `results` является словарем, возвращенным `search_datasets`, или `None`.
# @POST: Возвращает отформатированную строку с результатами.
# @PARAM: results (Optional[Dict]) - Словарь с результатами поиска.
# @PARAM: context_lines (int) - Количество строк контекста для вывода до и после совпадения.
# @RETURN: str - Отформатированный отчет.
def print_search_results(results: Optional[Dict], context_lines: int = 3) -> str:
if not results:
return "Ничего не найдено"
output = []
for dataset_id, matches in results.items():
# Получаем информацию о базе данных для текущего датасета
database_info = ""
# Ищем поле database среди совпадений, чтобы вывести его
for match_info in matches:
if match_info['field'] == 'database':
database_info = match_info['value']
break
# Если database не найден в совпадениях, пробуем получить из других полей
if not database_info:
# Предполагаем, что база данных может быть в одном из полей, например sql или table_name
# Но для точности лучше использовать специальное поле, которое мы уже получили
pass # Пока не выводим, если не нашли явно
output.append(f"\n--- Dataset ID: {dataset_id} ---")
if database_info:
output.append(f" Database: {database_info}")
output.append("") # Пустая строка для читабельности
for match_info in matches:
field, match_text, full_value = match_info['field'], match_info['match'], match_info['value']
output.append(f" - Поле: {field}")
output.append(f" Совпадение: '{match_text}'")
lines = full_value.splitlines()
if not lines: continue
match_line_index = -1
for i, line in enumerate(lines):
if match_text in line:
match_line_index = i
break
if match_line_index != -1:
start = max(0, match_line_index - context_lines)
end = min(len(lines), match_line_index + context_lines + 1)
output.append(" Контекст:")
for i in range(start, end):
prefix = f"{i + 1:5d}: "
line_content = lines[i]
if i == match_line_index:
highlighted = line_content.replace(match_text, f">>>{match_text}<<<")
output.append(f" {prefix}{highlighted}")
else:
output.append(f" {prefix}{line_content}")
output.append("-" * 25)
return "\n".join(output)
# [/DEF:print_search_results:Function]
# [DEF:main:Function]
# @PURPOSE: Основная точка входа для запуска скрипта поиска.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> search_datasets
# @RELATION: CALLS -> print_search_results
# @RELATION: CALLS -> save_results_to_file
def main():
logger = SupersetLogger(level=logging.INFO, console=True)
clients = setup_clients(logger)
target_client = clients['dev5']
search_query = r"from dm(_view)*.account_debt"
# Генерируем имя файла на основе времени
import datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"search_results_{timestamp}.txt"
results = search_datasets(
client=target_client,
search_pattern=search_query,
logger=logger
)
report = print_search_results(results)
logger.info(f"[main][Success] Search finished. Report:\n{report}")
# Сохраняем результаты в файл
success = save_results_to_file(results, output_filename, logger)
if success:
logger.info(f"[main][Success] Results also saved to file: {output_filename}")
else:
logger.error(f"[main][Failure] Failed to save results to file: {output_filename}")
# [/DEF:main:Function]
if __name__ == "__main__":
main()
# [/DEF:search_script:Module]