From 696aac32e78afe7ed9be928f8b7fb379f08cbb21 Mon Sep 17 00:00:00 2001 From: busya Date: Mon, 12 Jan 2026 12:33:51 +0300 Subject: [PATCH] 1st iter --- backend/src/api/routes/__init__.py | 2 +- backend/src/api/routes/connections.py | 78 +++++++ backend/src/app.py | 3 +- backend/src/core/database.py | 3 +- backend/src/core/superset_client.py | 22 ++ backend/src/core/task_manager/manager.py | 4 +- backend/src/core/task_manager/models.py | 1 + backend/src/core/task_manager/persistence.py | 2 + backend/src/models/connection.py | 34 +++ backend/src/models/task.py | 1 + backend/src/plugins/debug.py | 137 ++++++++++++ backend/src/plugins/mapper.py | 164 ++++++++++++++ backend/src/plugins/search.py | 161 ++++++++++++++ backup_script.py | 163 -------------- debug_db_api.py | 79 ------- frontend/src/components/Navbar.svelte | 25 ++- .../components/tools/ConnectionForm.svelte | 99 +++++++++ .../components/tools/ConnectionList.svelte | 82 +++++++ .../src/components/tools/DebugTool.svelte | 164 ++++++++++++++ .../src/components/tools/MapperTool.svelte | 159 ++++++++++++++ .../src/components/tools/SearchTool.svelte | 177 +++++++++++++++ .../routes/settings/connections/+page.svelte | 34 +++ frontend/src/routes/tools/debug/+page.svelte | 26 +++ frontend/src/routes/tools/mapper/+page.svelte | 26 +++ frontend/src/routes/tools/search/+page.svelte | 26 +++ frontend/src/services/connectionService.js | 52 +++++ frontend/src/services/toolsService.js | 40 ++++ get_dataset_structure.py | 64 ------ run_mapper.py | 72 ------- search_script.py | 204 ------------------ 30 files changed, 1511 insertions(+), 593 deletions(-) create mode 100644 backend/src/api/routes/connections.py create mode 100644 backend/src/models/connection.py create mode 100644 backend/src/plugins/debug.py create mode 100644 backend/src/plugins/mapper.py create mode 100644 backend/src/plugins/search.py delete mode 100755 backup_script.py delete mode 100755 debug_db_api.py create mode 100644 frontend/src/components/tools/ConnectionForm.svelte create mode 100644 frontend/src/components/tools/ConnectionList.svelte create mode 100644 frontend/src/components/tools/DebugTool.svelte create mode 100644 frontend/src/components/tools/MapperTool.svelte create mode 100644 frontend/src/components/tools/SearchTool.svelte create mode 100644 frontend/src/routes/settings/connections/+page.svelte create mode 100644 frontend/src/routes/tools/debug/+page.svelte create mode 100644 frontend/src/routes/tools/mapper/+page.svelte create mode 100644 frontend/src/routes/tools/search/+page.svelte create mode 100644 frontend/src/services/connectionService.js create mode 100644 frontend/src/services/toolsService.js delete mode 100755 get_dataset_structure.py delete mode 100755 run_mapper.py delete mode 100755 search_script.py diff --git a/backend/src/api/routes/__init__.py b/backend/src/api/routes/__init__.py index da359fd..3052e25 100755 --- a/backend/src/api/routes/__init__.py +++ b/backend/src/api/routes/__init__.py @@ -1 +1 @@ -from . import plugins, tasks, settings +from . import plugins, tasks, settings, connections diff --git a/backend/src/api/routes/connections.py b/backend/src/api/routes/connections.py new file mode 100644 index 0000000..ca754c5 --- /dev/null +++ b/backend/src/api/routes/connections.py @@ -0,0 +1,78 @@ +# [DEF:ConnectionsRouter:Module] +# @SEMANTICS: api, router, connections, database +# @PURPOSE: Defines the FastAPI router for managing external database connections. +# @LAYER: UI (API) +# @RELATION: Depends on SQLAlchemy session. +# @CONSTRAINT: Must use belief_scope for logging. + +# [SECTION: IMPORTS] +from typing import List +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session +from ...core.database import get_db +from ...models.connection import ConnectionConfig +from pydantic import BaseModel, Field +from datetime import datetime +from ...core.logger import logger, belief_scope +# [/SECTION] + +router = APIRouter() + +# [DEF:ConnectionSchema:Class] +class ConnectionSchema(BaseModel): + id: str + name: str + type: str + host: Optional[str] = None + port: Optional[int] = None + database: Optional[str] = None + username: Optional[str] = None + created_at: datetime + + class Config: + orm_mode = True + +# [DEF:ConnectionCreate:Class] +class ConnectionCreate(BaseModel): + name: str + type: str + host: Optional[str] = None + port: Optional[int] = None + database: Optional[str] = None + username: Optional[str] = None + password: Optional[str] = None + +from typing import Optional + +# [DEF:list_connections:Function] +@router.get("", response_model=List[ConnectionSchema]) +async def list_connections(db: Session = Depends(get_db)): + with belief_scope("ConnectionsRouter.list_connections"): + connections = db.query(ConnectionConfig).all() + return connections + +# [DEF:create_connection:Function] +@router.post("", response_model=ConnectionSchema, status_code=status.HTTP_201_CREATED) +async def create_connection(connection: ConnectionCreate, db: Session = Depends(get_db)): + with belief_scope("ConnectionsRouter.create_connection", f"name={connection.name}"): + db_connection = ConnectionConfig(**connection.dict()) + db.add(db_connection) + db.commit() + db.refresh(db_connection) + logger.info(f"[ConnectionsRouter.create_connection][Success] Created connection {db_connection.id}") + return db_connection + +# [DEF:delete_connection:Function] +@router.delete("/{connection_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_connection(connection_id: str, db: Session = Depends(get_db)): + with belief_scope("ConnectionsRouter.delete_connection", f"id={connection_id}"): + db_connection = db.query(ConnectionConfig).filter(ConnectionConfig.id == connection_id).first() + if not db_connection: + logger.error(f"[ConnectionsRouter.delete_connection][State] Connection {connection_id} not found") + raise HTTPException(status_code=404, detail="Connection not found") + db.delete(db_connection) + db.commit() + logger.info(f"[ConnectionsRouter.delete_connection][Success] Deleted connection {connection_id}") + return + +# [/DEF:ConnectionsRouter:Module] \ No newline at end of file diff --git a/backend/src/app.py b/backend/src/app.py index 28c25ac..a1fe8d7 100755 --- a/backend/src/app.py +++ b/backend/src/app.py @@ -20,7 +20,7 @@ import os from .dependencies import get_task_manager, get_scheduler_service from .core.logger import logger -from .api.routes import plugins, tasks, settings, environments, mappings, migration +from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections from .core.database import init_db # [DEF:App:Global] @@ -66,6 +66,7 @@ async def log_requests(request: Request, call_next): app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"]) app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"]) app.include_router(settings.router, prefix="/api/settings", tags=["Settings"]) +app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"]) app.include_router(environments.router, prefix="/api/environments", tags=["Environments"]) app.include_router(mappings.router) app.include_router(migration.router) diff --git a/backend/src/core/database.py b/backend/src/core/database.py index 0f999ce..a532368 100644 --- a/backend/src/core/database.py +++ b/backend/src/core/database.py @@ -12,8 +12,9 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, Session from backend.src.models.mapping import Base -# Import TaskRecord to ensure it's registered with Base +# Import models to ensure they're registered with Base from backend.src.models.task import TaskRecord +from backend.src.models.connection import ConnectionConfig import os # [/SECTION] diff --git a/backend/src/core/superset_client.py b/backend/src/core/superset_client.py index 511c0df..564aeb0 100644 --- a/backend/src/core/superset_client.py +++ b/backend/src/core/superset_client.py @@ -78,6 +78,28 @@ class SupersetClient(BaseSupersetClient): return result # [/DEF:SupersetClient.get_dashboards_summary:Function] + # [DEF:SupersetClient.get_dataset:Function] + # @PURPOSE: Fetch full dataset structure including columns and metrics. + # @PARAM: dataset_id (int) - The ID of the dataset. + # @RETURN: Dict - The dataset metadata. + def get_dataset(self, dataset_id: int) -> Dict: + """ + Fetch full dataset structure. + """ + return self.network.get(f"/api/v1/dataset/{dataset_id}").json() + # [/DEF:SupersetClient.get_dataset:Function] + + # [DEF:SupersetClient.update_dataset:Function] + # @PURPOSE: Update dataset metadata. + # @PARAM: dataset_id (int) - The ID of the dataset. + # @PARAM: data (Dict) - The payload for update. + def update_dataset(self, dataset_id: int, data: Dict): + """ + Update dataset metadata. + """ + self.network.put(f"/api/v1/dataset/{dataset_id}", json=data) + # [/DEF:SupersetClient.update_dataset:Function] + # [/DEF:SupersetClient:Class] # [/DEF:backend.src.core.superset_client:Module] diff --git a/backend/src/core/task_manager/manager.py b/backend/src/core/task_manager/manager.py index 839e07a..9526892 100644 --- a/backend/src/core/task_manager/manager.py +++ b/backend/src/core/task_manager/manager.py @@ -98,9 +98,9 @@ class TaskManager: params = {**task.params, "_task_id": task_id} if asyncio.iscoroutinefunction(plugin.execute): - await plugin.execute(params) + task.result = await plugin.execute(params) else: - await self.loop.run_in_executor( + task.result = await self.loop.run_in_executor( self.executor, plugin.execute, params diff --git a/backend/src/core/task_manager/models.py b/backend/src/core/task_manager/models.py index 51b9038..9cd80be 100644 --- a/backend/src/core/task_manager/models.py +++ b/backend/src/core/task_manager/models.py @@ -51,6 +51,7 @@ class Task(BaseModel): params: Dict[str, Any] = Field(default_factory=dict) input_required: bool = False input_request: Optional[Dict[str, Any]] = None + result: Optional[Dict[str, Any]] = None # [DEF:Task.__init__:Function] # @PURPOSE: Initializes the Task model and validates input_request for AWAITING_INPUT status. diff --git a/backend/src/core/task_manager/persistence.py b/backend/src/core/task_manager/persistence.py index 8abbf21..ae29d87 100644 --- a/backend/src/core/task_manager/persistence.py +++ b/backend/src/core/task_manager/persistence.py @@ -43,6 +43,7 @@ class TaskPersistenceService: record.started_at = task.started_at record.finished_at = task.finished_at record.params = task.params + record.result = task.result # Store logs as JSON, converting datetime to string record.logs = [] @@ -108,6 +109,7 @@ class TaskPersistenceService: started_at=record.started_at, finished_at=record.finished_at, params=record.params or {}, + result=record.result, logs=logs ) loaded_tasks.append(task) diff --git a/backend/src/models/connection.py b/backend/src/models/connection.py new file mode 100644 index 0000000..037aa09 --- /dev/null +++ b/backend/src/models/connection.py @@ -0,0 +1,34 @@ +# [DEF:backend.src.models.connection:Module] +# +# @SEMANTICS: database, connection, configuration, sqlalchemy, sqlite +# @PURPOSE: Defines the database schema for external database connection configurations. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> sqlalchemy +# +# @INVARIANT: All primary keys are UUID strings. + +# [SECTION: IMPORTS] +from sqlalchemy import Column, String, Integer, DateTime +from sqlalchemy.sql import func +from .mapping import Base +import uuid +# [/SECTION] + +# [DEF:ConnectionConfig:Class] +# @PURPOSE: Stores credentials for external databases used for column mapping. +class ConnectionConfig(Base): + __tablename__ = "connection_configs" + + id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + name = Column(String, nullable=False) + type = Column(String, nullable=False) # e.g., "postgres" + host = Column(String, nullable=True) + port = Column(Integer, nullable=True) + database = Column(String, nullable=True) + username = Column(String, nullable=True) + password = Column(String, nullable=True) # Encrypted/Obfuscated password + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) +# [/DEF:ConnectionConfig:Class] + +# [/DEF:backend.src.models.connection:Module] \ No newline at end of file diff --git a/backend/src/models/task.py b/backend/src/models/task.py index b7c3e12..f09268c 100644 --- a/backend/src/models/task.py +++ b/backend/src/models/task.py @@ -27,6 +27,7 @@ class TaskRecord(Base): finished_at = Column(DateTime(timezone=True), nullable=True) logs = Column(JSON, nullable=True) # Store structured logs as JSON error = Column(String, nullable=True) + result = Column(JSON, nullable=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) params = Column(JSON, nullable=True) # [/DEF:TaskRecord:Class] diff --git a/backend/src/plugins/debug.py b/backend/src/plugins/debug.py new file mode 100644 index 0000000..46a645c --- /dev/null +++ b/backend/src/plugins/debug.py @@ -0,0 +1,137 @@ +# [DEF:DebugPluginModule:Module] +# @SEMANTICS: plugin, debug, api, database, superset +# @PURPOSE: Implements a plugin for system diagnostics and debugging Superset API responses. +# @LAYER: Plugins +# @RELATION: Inherits from PluginBase. Uses SupersetClient from core. +# @CONSTRAINT: Must use belief_scope for logging. + +# [SECTION: IMPORTS] +from typing import Dict, Any, Optional +from ..core.plugin_base import PluginBase +from ..core.superset_client import SupersetClient +from ..core.logger import logger, belief_scope +# [/SECTION] + +# [DEF:DebugPlugin:Class] +# @PURPOSE: Plugin for system diagnostics and debugging. +class DebugPlugin(PluginBase): + """ + Plugin for system diagnostics and debugging. + """ + + @property + def id(self) -> str: + return "system-debug" + + @property + def name(self) -> str: + return "System Debug" + + @property + def description(self) -> str: + return "Run system diagnostics and debug Superset API responses." + + @property + def version(self) -> str: + return "1.0.0" + + # [DEF:DebugPlugin.get_schema:Function] + # @PURPOSE: Returns the JSON schema for the debug plugin parameters. + def get_schema(self) -> Dict[str, Any]: + return { + "type": "object", + "properties": { + "action": { + "type": "string", + "title": "Action", + "enum": ["test-db-api", "get-dataset-structure"], + "default": "test-db-api" + }, + "env": { + "type": "string", + "title": "Environment", + "description": "The Superset environment (for dataset structure)." + }, + "dataset_id": { + "type": "integer", + "title": "Dataset ID", + "description": "The ID of the dataset (for dataset structure)." + }, + "source_env": { + "type": "string", + "title": "Source Environment", + "description": "Source env for DB API test." + }, + "target_env": { + "type": "string", + "title": "Target Environment", + "description": "Target env for DB API test." + } + }, + "required": ["action"] + } + # [/DEF:DebugPlugin.get_schema:Function] + + # [DEF:DebugPlugin.execute:Function] + # @PURPOSE: Executes the debug logic. + async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]: + with belief_scope("DebugPlugin.execute", f"params={params}"): + action = params.get("action") + + if action == "test-db-api": + return await self._test_db_api(params) + elif action == "get-dataset-structure": + return await self._get_dataset_structure(params) + else: + raise ValueError(f"Unknown action: {action}") + # [/DEF:DebugPlugin.execute:Function] + + # [DEF:DebugPlugin._test_db_api:Function] + async def _test_db_api(self, params: Dict[str, Any]) -> Dict[str, Any]: + source_env_name = params.get("source_env") + target_env_name = params.get("target_env") + + if not source_env_name or not target_env_name: + raise ValueError("source_env and target_env are required for test-db-api") + + from ..dependencies import get_config_manager + config_manager = get_config_manager() + + results = {} + for name in [source_env_name, target_env_name]: + env_config = config_manager.get_environment(name) + if not env_config: + raise ValueError(f"Environment '{name}' not found.") + + client = SupersetClient(env_config) + client.authenticate() + count, dbs = client.get_databases() + results[name] = { + "count": count, + "databases": dbs + } + + return results + + # [DEF:DebugPlugin._get_dataset_structure:Function] + async def _get_dataset_structure(self, params: Dict[str, Any]) -> Dict[str, Any]: + env_name = params.get("env") + dataset_id = params.get("dataset_id") + + if not env_name or dataset_id is None: + raise ValueError("env and dataset_id are required for get-dataset-structure") + + from ..dependencies import get_config_manager + config_manager = get_config_manager() + env_config = config_manager.get_environment(env_name) + if not env_config: + raise ValueError(f"Environment '{env_name}' not found.") + + client = SupersetClient(env_config) + client.authenticate() + + dataset_response = client.get_dataset(dataset_id) + return dataset_response.get('result') or {} + +# [/DEF:DebugPlugin:Class] +# [/DEF:DebugPluginModule:Module] \ No newline at end of file diff --git a/backend/src/plugins/mapper.py b/backend/src/plugins/mapper.py new file mode 100644 index 0000000..d2bdc17 --- /dev/null +++ b/backend/src/plugins/mapper.py @@ -0,0 +1,164 @@ +# [DEF:MapperPluginModule:Module] +# @SEMANTICS: plugin, mapper, datasets, postgresql, excel +# @PURPOSE: Implements a plugin for mapping dataset columns using external database connections or Excel files. +# @LAYER: Plugins +# @RELATION: Inherits from PluginBase. Uses DatasetMapper from superset_tool. +# @CONSTRAINT: Must use belief_scope for logging. + +# [SECTION: IMPORTS] +from typing import Dict, Any, Optional +from ..core.plugin_base import PluginBase +from ..core.superset_client import SupersetClient +from ..core.logger import logger, belief_scope +from ..core.database import SessionLocal +from ..models.connection import ConnectionConfig +from superset_tool.utils.dataset_mapper import DatasetMapper +from superset_tool.utils.logger import SupersetLogger +# [/SECTION] + +# [DEF:MapperPlugin:Class] +# @PURPOSE: Plugin for mapping dataset columns verbose names. +class MapperPlugin(PluginBase): + """ + Plugin for mapping dataset columns verbose names. + """ + + @property + def id(self) -> str: + return "dataset-mapper" + + @property + def name(self) -> str: + return "Dataset Mapper" + + @property + def description(self) -> str: + return "Map dataset column verbose names using PostgreSQL comments or Excel files." + + @property + def version(self) -> str: + return "1.0.0" + + # [DEF:MapperPlugin.get_schema:Function] + # @PURPOSE: Returns the JSON schema for the mapper plugin parameters. + def get_schema(self) -> Dict[str, Any]: + return { + "type": "object", + "properties": { + "env": { + "type": "string", + "title": "Environment", + "description": "The Superset environment (e.g., 'dev')." + }, + "dataset_id": { + "type": "integer", + "title": "Dataset ID", + "description": "The ID of the dataset to update." + }, + "source": { + "type": "string", + "title": "Mapping Source", + "enum": ["postgres", "excel"], + "default": "postgres" + }, + "connection_id": { + "type": "string", + "title": "Saved Connection", + "description": "The ID of a saved database connection (for postgres source)." + }, + "table_name": { + "type": "string", + "title": "Table Name", + "description": "Target table name in PostgreSQL." + }, + "table_schema": { + "type": "string", + "title": "Table Schema", + "description": "Target table schema in PostgreSQL.", + "default": "public" + }, + "excel_path": { + "type": "string", + "title": "Excel Path", + "description": "Path to the Excel file (for excel source)." + } + }, + "required": ["env", "dataset_id", "source"] + } + # [/DEF:MapperPlugin.get_schema:Function] + + # [DEF:MapperPlugin.execute:Function] + # @PURPOSE: Executes the dataset mapping logic. + # @PRE: Params contain valid 'env', 'dataset_id', and 'source'. + # @POST: Updates the dataset in Superset. + async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]: + with belief_scope("MapperPlugin.execute", f"params={params}"): + env_name = params.get("env") + dataset_id = params.get("dataset_id") + source = params.get("source") + + if not env_name or dataset_id is None or not source: + logger.error("[MapperPlugin.execute][State] Missing required parameters.") + raise ValueError("Missing required parameters: env, dataset_id, source") + + # Get config and initialize client + from ..dependencies import get_config_manager + config_manager = get_config_manager() + env_config = config_manager.get_environment(env_name) + if not env_config: + logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.") + raise ValueError(f"Environment '{env_name}' not found in configuration.") + + client = SupersetClient(env_config) + client.authenticate() + + postgres_config = None + if source == "postgres": + connection_id = params.get("connection_id") + if not connection_id: + logger.error("[MapperPlugin.execute][State] connection_id is required for postgres source.") + raise ValueError("connection_id is required for postgres source.") + + # Load connection from DB + db = SessionLocal() + try: + conn_config = db.query(ConnectionConfig).filter(ConnectionConfig.id == connection_id).first() + if not conn_config: + logger.error(f"[MapperPlugin.execute][State] Connection {connection_id} not found.") + raise ValueError(f"Connection {connection_id} not found.") + + postgres_config = { + 'dbname': conn_config.database, + 'user': conn_config.username, + 'password': conn_config.password, + 'host': conn_config.host, + 'port': str(conn_config.port) if conn_config.port else '5432' + } + finally: + db.close() + + logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}") + + # Use internal SupersetLogger for DatasetMapper + s_logger = SupersetLogger(name="dataset_mapper_plugin") + mapper = DatasetMapper(s_logger) + + try: + mapper.run_mapping( + superset_client=client, + dataset_id=dataset_id, + source=source, + postgres_config=postgres_config, + excel_path=params.get("excel_path"), + table_name=params.get("table_name"), + table_schema=params.get("table_schema") or "public" + ) + logger.info(f"[MapperPlugin.execute][Success] Mapping completed for dataset {dataset_id}") + return {"status": "success", "dataset_id": dataset_id} + except Exception as e: + logger.error(f"[MapperPlugin.execute][Failure] Mapping failed: {e}") + raise + # [/DEF:MapperPlugin.execute:Function] + +# [/DEF:MapperPlugin:Class] +# [/DEF:MapperPluginModule:Module] \ No newline at end of file diff --git a/backend/src/plugins/search.py b/backend/src/plugins/search.py new file mode 100644 index 0000000..44b9fdd --- /dev/null +++ b/backend/src/plugins/search.py @@ -0,0 +1,161 @@ +# [DEF:SearchPluginModule:Module] +# @SEMANTICS: plugin, search, datasets, regex, superset +# @PURPOSE: Implements a plugin for searching text patterns across all datasets in a specific Superset environment. +# @LAYER: Plugins +# @RELATION: Inherits from PluginBase. Uses SupersetClient from core. +# @CONSTRAINT: Must use belief_scope for logging. + +# [SECTION: IMPORTS] +import re +from typing import Dict, Any, List, Optional +from ..core.plugin_base import PluginBase +from ..core.superset_client import SupersetClient +from ..core.logger import logger, belief_scope +# [/SECTION] + +# [DEF:SearchPlugin:Class] +# @PURPOSE: Plugin for searching text patterns in Superset datasets. +class SearchPlugin(PluginBase): + """ + Plugin for searching text patterns in Superset datasets. + """ + + @property + def id(self) -> str: + return "search-datasets" + + @property + def name(self) -> str: + return "Search Datasets" + + @property + def description(self) -> str: + return "Search for text patterns across all datasets in a specific environment." + + @property + def version(self) -> str: + return "1.0.0" + + # [DEF:SearchPlugin.get_schema:Function] + # @PURPOSE: Returns the JSON schema for the search plugin parameters. + def get_schema(self) -> Dict[str, Any]: + return { + "type": "object", + "properties": { + "env": { + "type": "string", + "title": "Environment", + "description": "The Superset environment to search in (e.g., 'dev', 'prod')." + }, + "query": { + "type": "string", + "title": "Search Query (Regex)", + "description": "The regex pattern to search for." + } + }, + "required": ["env", "query"] + } + # [/DEF:SearchPlugin.get_schema:Function] + + # [DEF:SearchPlugin.execute:Function] + # @PURPOSE: Executes the dataset search logic. + # @PRE: Params contain valid 'env' and 'query'. + # @POST: Returns a dictionary with count and results list. + async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]: + with belief_scope("SearchPlugin.execute", f"params={params}"): + env_name = params.get("env") + search_query = params.get("query") + + if not env_name or not search_query: + logger.error("[SearchPlugin.execute][State] Missing required parameters.") + raise ValueError("Missing required parameters: env, query") + + # Get config and initialize client + from ..dependencies import get_config_manager + config_manager = get_config_manager() + env_config = config_manager.get_environment(env_name) + if not env_config: + logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.") + raise ValueError(f"Environment '{env_name}' not found in configuration.") + + client = SupersetClient(env_config) + client.authenticate() + + logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}") + + try: + # Ported logic from search_script.py + _, datasets = client.get_datasets(query={"columns": ["id", "table_name", "sql", "database", "columns"]}) + + if not datasets: + logger.warning("[SearchPlugin.execute][State] No datasets found.") + return {"count": 0, "results": []} + + pattern = re.compile(search_query, re.IGNORECASE) + results = [] + + for dataset in datasets: + dataset_id = dataset.get('id') + dataset_name = dataset.get('table_name', 'Unknown') + if not dataset_id: + continue + + for field, value in dataset.items(): + value_str = str(value) + if pattern.search(value_str): + match_obj = pattern.search(value_str) + results.append({ + "dataset_id": dataset_id, + "dataset_name": dataset_name, + "field": field, + "match_context": self._get_context(value_str, match_obj.group() if match_obj else ""), + "full_value": value_str + }) + + logger.info(f"[SearchPlugin.execute][Success] Found matches in {len(results)} locations.") + return { + "count": len(results), + "results": results + } + + except re.error as e: + logger.error(f"[SearchPlugin.execute][Failure] Invalid regex pattern: {e}") + raise ValueError(f"Invalid regex pattern: {e}") + except Exception as e: + logger.error(f"[SearchPlugin.execute][Failure] Error during search: {e}") + raise + # [/DEF:SearchPlugin.execute:Function] + + # [DEF:SearchPlugin._get_context:Function] + # @PURPOSE: Extracts a small context around the match for display. + def _get_context(self, text: str, match_text: str, context_lines: int = 1) -> str: + """ + Extracts a small context around the match for display. + """ + if not match_text: + return text[:100] + "..." if len(text) > 100 else text + + lines = text.splitlines() + match_line_index = -1 + for i, line in enumerate(lines): + if match_text in line: + match_line_index = i + break + + if match_line_index != -1: + start = max(0, match_line_index - context_lines) + end = min(len(lines), match_line_index + context_lines + 1) + context = [] + for i in range(start, end): + line_content = lines[i] + if i == match_line_index: + context.append(f"==> {line_content}") + else: + context.append(f" {line_content}") + return "\n".join(context) + + return text[:100] + "..." if len(text) > 100 else text + # [/DEF:SearchPlugin._get_context:Function] + +# [/DEF:SearchPlugin:Class] +# [/DEF:SearchPluginModule:Module] \ No newline at end of file diff --git a/backup_script.py b/backup_script.py deleted file mode 100755 index 278a0aa..0000000 --- a/backup_script.py +++ /dev/null @@ -1,163 +0,0 @@ -# [DEF:backup_script:Module] -# -# @SEMANTICS: backup, superset, automation, dashboard -# @PURPOSE: Этот модуль отвечает за автоматизированное резервное копирование дашбордов Superset. -# @LAYER: App -# @RELATION: DEPENDS_ON -> superset_tool.client -# @RELATION: DEPENDS_ON -> superset_tool.utils -# @PUBLIC_API: BackupConfig, backup_dashboards, main - -# [SECTION: IMPORTS] -import logging -import sys -from pathlib import Path -from dataclasses import dataclass,field -from requests.exceptions import RequestException -from superset_tool.client import SupersetClient -from superset_tool.exceptions import SupersetAPIError -from superset_tool.utils.logger import SupersetLogger -from superset_tool.utils.fileio import ( - save_and_unpack_dashboard, - archive_exports, - sanitize_filename, - consolidate_archive_folders, - remove_empty_directories, - RetentionPolicy -) -from superset_tool.utils.init_clients import setup_clients -# [/SECTION] - -# [DEF:BackupConfig:DataClass] -# @PURPOSE: Хранит конфигурацию для процесса бэкапа. -@dataclass -class BackupConfig: - """Конфигурация для процесса бэкапа.""" - consolidate: bool = True - rotate_archive: bool = True - clean_folders: bool = True - retention_policy: RetentionPolicy = field(default_factory=RetentionPolicy) -# [/DEF:BackupConfig:DataClass] - -# [DEF:backup_dashboards:Function] -# @PURPOSE: Выполняет бэкап всех доступных дашбордов для заданного клиента и окружения, пропуская ошибки экспорта. -# @PRE: `client` должен быть инициализированным экземпляром `SupersetClient`. -# @PRE: `env_name` должен быть строкой, обозначающей окружение. -# @PRE: `backup_root` должен быть валидным путем к корневой директории бэкапа. -# @POST: Дашборды экспортируются и сохраняются. Ошибки экспорта логируются и не приводят к остановке скрипта. -# @RELATION: CALLS -> client.get_dashboards -# @RELATION: CALLS -> client.export_dashboard -# @RELATION: CALLS -> save_and_unpack_dashboard -# @RELATION: CALLS -> archive_exports -# @RELATION: CALLS -> consolidate_archive_folders -# @RELATION: CALLS -> remove_empty_directories -# @PARAM: client (SupersetClient) - Клиент для доступа к API Superset. -# @PARAM: env_name (str) - Имя окружения (e.g., 'PROD'). -# @PARAM: backup_root (Path) - Корневая директория для сохранения бэкапов. -# @PARAM: logger (SupersetLogger) - Инстанс логгера. -# @PARAM: config (BackupConfig) - Конфигурация процесса бэкапа. -# @RETURN: bool - `True` если все дашборды были экспортированы без критических ошибок, `False` иначе. -def backup_dashboards( - client: SupersetClient, - env_name: str, - backup_root: Path, - logger: SupersetLogger, - config: BackupConfig -) -> bool: - logger.info(f"[backup_dashboards][Entry] Starting backup for {env_name}.") - try: - dashboard_count, dashboard_meta = client.get_dashboards() - logger.info(f"[backup_dashboards][Progress] Found {dashboard_count} dashboards to export in {env_name}.") - if dashboard_count == 0: - return True - - success_count = 0 - for db in dashboard_meta: - dashboard_id = db.get('id') - dashboard_title = db.get('dashboard_title', 'Unknown Dashboard') - if not dashboard_id: - continue - - try: - dashboard_base_dir_name = sanitize_filename(f"{dashboard_title}") - dashboard_dir = backup_root / env_name / dashboard_base_dir_name - dashboard_dir.mkdir(parents=True, exist_ok=True) - - zip_content, filename = client.export_dashboard(dashboard_id) - - save_and_unpack_dashboard( - zip_content=zip_content, - original_filename=filename, - output_dir=dashboard_dir, - unpack=False, - logger=logger - ) - - if config.rotate_archive: - archive_exports(str(dashboard_dir), policy=config.retention_policy, logger=logger) - - success_count += 1 - except (SupersetAPIError, RequestException, IOError, OSError) as db_error: - logger.error(f"[backup_dashboards][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True) - continue - - if config.consolidate: - consolidate_archive_folders(backup_root / env_name , logger=logger) - - if config.clean_folders: - remove_empty_directories(str(backup_root / env_name), logger=logger) - - logger.info(f"[backup_dashboards][CoherenceCheck:Passed] Backup logic completed.") - return success_count == dashboard_count - except (RequestException, IOError) as e: - logger.critical(f"[backup_dashboards][Failure] Fatal error during backup for {env_name}: {e}", exc_info=True) - return False -# [/DEF:backup_dashboards:Function] - -# [DEF:main:Function] -# @PURPOSE: Основная точка входа для запуска процесса резервного копирования. -# @RELATION: CALLS -> setup_clients -# @RELATION: CALLS -> backup_dashboards -# @RETURN: int - Код выхода (0 - успех, 1 - ошибка). -def main() -> int: - log_dir = Path("P:\\Superset\\010 Бекапы\\Logs") - logger = SupersetLogger(log_dir=log_dir, level=logging.INFO, console=True) - logger.info("[main][Entry] Starting Superset backup process.") - - exit_code = 0 - try: - clients = setup_clients(logger) - superset_backup_repo = Path("P:\\Superset\\010 Бекапы") - superset_backup_repo.mkdir(parents=True, exist_ok=True) - - results = {} - environments = ['dev', 'sbx', 'prod', 'preprod'] - backup_config = BackupConfig(rotate_archive=True) - - for env in environments: - try: - results[env] = backup_dashboards( - clients[env], - env.upper(), - superset_backup_repo, - logger=logger, - config=backup_config - ) - except Exception as env_error: - logger.critical(f"[main][Failure] Critical error for environment {env}: {env_error}", exc_info=True) - results[env] = False - - if not all(results.values()): - exit_code = 1 - - except (RequestException, IOError) as e: - logger.critical(f"[main][Failure] Fatal error in main execution: {e}", exc_info=True) - exit_code = 1 - - logger.info("[main][Exit] Superset backup process finished.") - return exit_code -# [/DEF:main:Function] - -if __name__ == "__main__": - sys.exit(main()) - -# [/DEF:backup_script:Module] diff --git a/debug_db_api.py b/debug_db_api.py deleted file mode 100755 index f2e155f..0000000 --- a/debug_db_api.py +++ /dev/null @@ -1,79 +0,0 @@ -# [DEF:debug_db_api:Module] -# -# @SEMANTICS: debug, api, database, script -# @PURPOSE: Скрипт для отладки структуры ответа API баз данных. -# @LAYER: App -# @RELATION: DEPENDS_ON -> superset_tool.client -# @RELATION: DEPENDS_ON -> superset_tool.utils -# @PUBLIC_API: debug_database_api - -# [SECTION: IMPORTS] -import json -import logging -from superset_tool.client import SupersetClient -from superset_tool.utils.init_clients import setup_clients -from superset_tool.utils.logger import SupersetLogger -# [/SECTION] - -# [DEF:debug_database_api:Function] -# @PURPOSE: Отладка структуры ответа API баз данных. -# @RELATION: CALLS -> setup_clients -# @RELATION: CALLS -> client.get_databases -def debug_database_api(): - logger = SupersetLogger(name="debug_db_api", level=logging.DEBUG) - - # Инициализируем клиенты - clients = setup_clients(logger) - # Log JWT bearer tokens for each client - for env_name, client in clients.items(): - try: - # Ensure authentication (access token fetched via headers property) - _ = client.headers - token = client.network._tokens.get("access_token") - logger.info(f"[debug_database_api][Token] Bearer token for {env_name}: {token}") - except Exception as exc: - logger.error(f"[debug_database_api][Token] Failed to retrieve token for {env_name}: {exc}", exc_info=True) - - # Проверяем доступные окружения - print("Доступные окружения:") - for env_name, client in clients.items(): - print(f" {env_name}: {client.config.base_url}") - - # Выбираем два окружения для тестирования - if len(clients) < 2: - print("Недостаточно окружений для тестирования") - return - - env_names = list(clients.keys())[:2] - from_env, to_env = env_names[0], env_names[1] - - from_client = clients[from_env] - to_client = clients[to_env] - - print(f"\nТестируем API для окружений: {from_env} -> {to_env}") - - try: - # Получаем список баз данных из первого окружения - print(f"\nПолучаем список БД из {from_env}:") - count, dbs = from_client.get_databases() - print(f"Найдено {count} баз данных") - print("Полный ответ API:") - print(json.dumps({"count": count, "result": dbs}, indent=2, ensure_ascii=False)) - - # Получаем список баз данных из второго окружения - print(f"\nПолучаем список БД из {to_env}:") - count, dbs = to_client.get_databases() - print(f"Найдено {count} баз данных") - print("Полный ответ API:") - print(json.dumps({"count": count, "result": dbs}, indent=2, ensure_ascii=False)) - - except Exception as e: - print(f"Ошибка при тестировании API: {e}") - import traceback - traceback.print_exc() -# [/DEF:debug_database_api:Function] - -if __name__ == "__main__": - debug_database_api() - -# [/DEF:debug_db_api:Module] diff --git a/frontend/src/components/Navbar.svelte b/frontend/src/components/Navbar.svelte index 6423db6..d2d9ba7 100644 --- a/frontend/src/components/Navbar.svelte +++ b/frontend/src/components/Navbar.svelte @@ -35,12 +35,25 @@ > Tasks - - Settings - +
+ + +
+
+ + +
diff --git a/frontend/src/components/tools/ConnectionForm.svelte b/frontend/src/components/tools/ConnectionForm.svelte new file mode 100644 index 0000000..a6aee78 --- /dev/null +++ b/frontend/src/components/tools/ConnectionForm.svelte @@ -0,0 +1,99 @@ + + + + + +
+

Add New Connection

+
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ +
+
+
+ + \ No newline at end of file diff --git a/frontend/src/components/tools/ConnectionList.svelte b/frontend/src/components/tools/ConnectionList.svelte new file mode 100644 index 0000000..8b4e36d --- /dev/null +++ b/frontend/src/components/tools/ConnectionList.svelte @@ -0,0 +1,82 @@ + + + + + +
+
+

Saved Connections

+
+ +
+ + \ No newline at end of file diff --git a/frontend/src/components/tools/DebugTool.svelte b/frontend/src/components/tools/DebugTool.svelte new file mode 100644 index 0000000..11d613b --- /dev/null +++ b/frontend/src/components/tools/DebugTool.svelte @@ -0,0 +1,164 @@ + + + + +
+
+

System Diagnostics

+ +
+ + +
+ + {#if action === 'test-db-api'} +
+
+ + +
+
+ + +
+
+ {:else} +
+
+ + +
+
+ + +
+
+ {/if} + +
+ +
+
+ + {#if results} +
+
+

Debug Output

+
+
+
{JSON.stringify(results, null, 2)}
+
+
+ {/if} +
\ No newline at end of file diff --git a/frontend/src/components/tools/MapperTool.svelte b/frontend/src/components/tools/MapperTool.svelte new file mode 100644 index 0000000..de92708 --- /dev/null +++ b/frontend/src/components/tools/MapperTool.svelte @@ -0,0 +1,159 @@ + + + + + +
+

Dataset Column Mapper

+
+
+
+ + +
+
+ + +
+
+ +
+ +
+ + +
+
+ + {#if source === 'postgres'} +
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ {:else} +
+ + +
+ {/if} + +
+ +
+
+
+ + \ No newline at end of file diff --git a/frontend/src/components/tools/SearchTool.svelte b/frontend/src/components/tools/SearchTool.svelte new file mode 100644 index 0000000..f88ed2e --- /dev/null +++ b/frontend/src/components/tools/SearchTool.svelte @@ -0,0 +1,177 @@ + + + + + +
+
+

Search Dataset Metadata

+
+
+ + +
+
+ + +
+
+
+ +
+
+ + {#if results} +
+
+

+ Search Results +

+ + {results.count} matches + +
+
    + {#each results.results as item} +
  • +
    +
    + {item.dataset_name} (ID: {item.dataset_id}) +
    +
    +

    + Field: {item.field} +

    +
    +
    +
    +
    {item.match_context}
    +
    +
  • + {/each} + {#if results.count === 0} +
  • + No matches found for the given pattern. +
  • + {/if} +
+
+ {/if} +
+ + \ No newline at end of file diff --git a/frontend/src/routes/settings/connections/+page.svelte b/frontend/src/routes/settings/connections/+page.svelte new file mode 100644 index 0000000..7be1098 --- /dev/null +++ b/frontend/src/routes/settings/connections/+page.svelte @@ -0,0 +1,34 @@ + + + + +
+
+

Connection Management

+ +
+
+ +
+
+ +
+
+
+
+ \ No newline at end of file diff --git a/frontend/src/routes/tools/debug/+page.svelte b/frontend/src/routes/tools/debug/+page.svelte new file mode 100644 index 0000000..082994b --- /dev/null +++ b/frontend/src/routes/tools/debug/+page.svelte @@ -0,0 +1,26 @@ + + + + +
+
+

System Diagnostics

+ +
+
+ +
+
+ +
+
+
+
+ \ No newline at end of file diff --git a/frontend/src/routes/tools/mapper/+page.svelte b/frontend/src/routes/tools/mapper/+page.svelte new file mode 100644 index 0000000..e01ac33 --- /dev/null +++ b/frontend/src/routes/tools/mapper/+page.svelte @@ -0,0 +1,26 @@ + + + + +
+
+

Dataset Column Mapper

+ +
+
+ +
+
+ +
+
+
+
+ \ No newline at end of file diff --git a/frontend/src/routes/tools/search/+page.svelte b/frontend/src/routes/tools/search/+page.svelte new file mode 100644 index 0000000..4415363 --- /dev/null +++ b/frontend/src/routes/tools/search/+page.svelte @@ -0,0 +1,26 @@ + + + + +
+
+

Dataset Search

+ +
+
+ +
+
+ +
+
+
+
+ \ No newline at end of file diff --git a/frontend/src/services/connectionService.js b/frontend/src/services/connectionService.js new file mode 100644 index 0000000..9562ed1 --- /dev/null +++ b/frontend/src/services/connectionService.js @@ -0,0 +1,52 @@ +/** + * Service for interacting with the Connection Management API. + */ + +const API_BASE = '/api/settings/connections'; + +/** + * Fetch a list of saved connections. + * @returns {Promise} List of connections. + */ +export async function getConnections() { + const response = await fetch(API_BASE); + if (!response.ok) { + throw new Error(`Failed to fetch connections: ${response.statusText}`); + } + return await response.json(); +} + +/** + * Create a new connection configuration. + * @param {Object} connectionData - The connection data. + * @returns {Promise} The created connection instance. + */ +export async function createConnection(connectionData) { + const response = await fetch(API_BASE, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(connectionData) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.detail || `Failed to create connection: ${response.statusText}`); + } + return await response.json(); +} + +/** + * Delete a connection configuration. + * @param {string} connectionId - The ID of the connection to delete. + */ +export async function deleteConnection(connectionId) { + const response = await fetch(`${API_BASE}/${connectionId}`, { + method: 'DELETE' + }); + + if (!response.ok) { + throw new Error(`Failed to delete connection: ${response.statusText}`); + } +} \ No newline at end of file diff --git a/frontend/src/services/toolsService.js b/frontend/src/services/toolsService.js new file mode 100644 index 0000000..d45378d --- /dev/null +++ b/frontend/src/services/toolsService.js @@ -0,0 +1,40 @@ +/** + * Service for generic Task API communication used by Tools. + */ + +const API_BASE = '/api/tasks'; + +/** + * Start a new task for a given plugin. + * @param {string} pluginId - The ID of the plugin to run. + * @param {Object} params - Parameters for the plugin. + * @returns {Promise} The created task instance. + */ +export async function runTask(pluginId, params) { + const response = await fetch(API_BASE, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ plugin_id: pluginId, params }) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.detail || `Failed to start task: ${response.statusText}`); + } + return await response.json(); +} + +/** + * Fetch details for a specific task (to poll status or get result). + * @param {string} taskId - The ID of the task. + * @returns {Promise} Task details. + */ +export async function getTaskStatus(taskId) { + const response = await fetch(`${API_BASE}/${taskId}`); + if (!response.ok) { + throw new Error(`Failed to fetch task ${taskId}: ${response.statusText}`); + } + return await response.json(); +} \ No newline at end of file diff --git a/get_dataset_structure.py b/get_dataset_structure.py deleted file mode 100755 index 4d5f9a4..0000000 --- a/get_dataset_structure.py +++ /dev/null @@ -1,64 +0,0 @@ -# [DEF:get_dataset_structure:Module] -# -# @SEMANTICS: superset, dataset, structure, debug, json -# @PURPOSE: Этот модуль предназначен для получения и сохранения структуры данных датасета из Superset. Он используется для отладки и анализа данных, возвращаемых API. -# @LAYER: App -# @RELATION: DEPENDS_ON -> superset_tool.client -# @RELATION: DEPENDS_ON -> superset_tool.utils.init_clients -# @RELATION: DEPENDS_ON -> superset_tool.utils.logger -# @PUBLIC_API: get_and_save_dataset - -# [SECTION: IMPORTS] -import argparse -import json -from superset_tool.utils.init_clients import setup_clients -from superset_tool.utils.logger import SupersetLogger -# [/SECTION] - -# [DEF:get_and_save_dataset:Function] -# @PURPOSE: Получает структуру датасета из Superset и сохраняет ее в JSON-файл. -# @RELATION: CALLS -> setup_clients -# @RELATION: CALLS -> superset_client.get_dataset -# @PARAM: env (str) - Среда (dev, prod, и т.д.) для подключения. -# @PARAM: dataset_id (int) - ID датасета для получения. -# @PARAM: output_path (str) - Путь для сохранения JSON-файла. -def get_and_save_dataset(env: str, dataset_id: int, output_path: str): - """ - Получает структуру датасета и сохраняет в файл. - """ - logger = SupersetLogger(name="DatasetStructureRetriever") - logger.info("[get_and_save_dataset][Enter] Starting to fetch dataset structure for ID %d from env '%s'.", dataset_id, env) - - try: - clients = setup_clients(logger=logger) - superset_client = clients.get(env) - if not superset_client: - logger.error("[get_and_save_dataset][Failure] Environment '%s' not found.", env) - return - - dataset_response = superset_client.get_dataset(dataset_id) - dataset_data = dataset_response.get('result') - - if not dataset_data: - logger.error("[get_and_save_dataset][Failure] No result in dataset response.") - return - - with open(output_path, 'w', encoding='utf-8') as f: - json.dump(dataset_data, f, ensure_ascii=False, indent=4) - - logger.info("[get_and_save_dataset][Success] Dataset structure saved to %s.", output_path) - - except Exception as e: - logger.error("[get_and_save_dataset][Failure] An error occurred: %s", e, exc_info=True) -# [/DEF:get_and_save_dataset:Function] - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Получение структуры датасета из Superset.") - parser.add_argument("--dataset-id", required=True, type=int, help="ID датасета.") - parser.add_argument("--env", required=True, help="Среда для подключения (например, dev).") - parser.add_argument("--output-path", default="dataset_structure.json", help="Путь для сохранения JSON-файла.") - args = parser.parse_args() - - get_and_save_dataset(args.env, args.dataset_id, args.output_path) - -# [/DEF:get_dataset_structure:Module] diff --git a/run_mapper.py b/run_mapper.py deleted file mode 100755 index 1ac38b9..0000000 --- a/run_mapper.py +++ /dev/null @@ -1,72 +0,0 @@ -# [DEF:run_mapper:Module] -# -# @SEMANTICS: runner, configuration, cli, main -# @PURPOSE: Этот модуль является CLI-точкой входа для запуска процесса меппинга метаданных датасетов. -# @LAYER: App -# @RELATION: DEPENDS_ON -> superset_tool.utils.dataset_mapper -# @RELATION: DEPENDS_ON -> superset_tool.utils -# @PUBLIC_API: main - -# [SECTION: IMPORTS] -import argparse -import keyring -from superset_tool.utils.init_clients import setup_clients -from superset_tool.utils.logger import SupersetLogger -from superset_tool.utils.dataset_mapper import DatasetMapper -# [/SECTION] - -# [DEF:main:Function] -# @PURPOSE: Парсит аргументы командной строки и запускает процесс меппинга. -# @RELATION: CREATES_INSTANCE_OF -> DatasetMapper -# @RELATION: CALLS -> setup_clients -# @RELATION: CALLS -> DatasetMapper.run_mapping -def main(): - parser = argparse.ArgumentParser(description="Map dataset verbose names in Superset.") - parser.add_argument('--source', type=str, required=True, choices=['postgres', 'excel', 'both'], help='The source for the mapping.') - parser.add_argument('--dataset-id', type=int, required=True, help='The ID of the dataset to update.') - parser.add_argument('--table-name', type=str, help='The table name for PostgreSQL source.') - parser.add_argument('--table-schema', type=str, help='The table schema for PostgreSQL source.') - parser.add_argument('--excel-path', type=str, help='The path to the Excel file.') - parser.add_argument('--env', type=str, default='dev', help='The Superset environment to use.') - - args = parser.parse_args() - logger = SupersetLogger(name="dataset_mapper_main") - - # [AI_NOTE]: Конфигурация БД должна быть вынесена во внешний файл или переменные окружения. - POSTGRES_CONFIG = { - 'dbname': 'dwh', - 'user': keyring.get_password("system", f"dwh gp user"), - 'password': keyring.get_password("system", f"dwh gp password"), - 'host': '10.66.229.201', - 'port': '5432' - } - - logger.info("[main][Enter] Starting dataset mapper CLI.") - try: - clients = setup_clients(logger) - superset_client = clients.get(args.env) - - if not superset_client: - logger.error(f"[main][Failure] Superset client for '{args.env}' environment not found.") - return - - mapper = DatasetMapper(logger) - mapper.run_mapping( - superset_client=superset_client, - dataset_id=args.dataset_id, - source=args.source, - postgres_config=POSTGRES_CONFIG if args.source in ['postgres', 'both'] else None, - excel_path=args.excel_path if args.source in ['excel', 'both'] else None, - table_name=args.table_name if args.source in ['postgres', 'both'] else None, - table_schema=args.table_schema if args.source in ['postgres', 'both'] else None - ) - logger.info("[main][Exit] Dataset mapper process finished.") - - except Exception as main_exc: - logger.error("[main][Failure] An unexpected error occurred: %s", main_exc, exc_info=True) -# [/DEF:main:Function] - -if __name__ == '__main__': - main() - -# [/DEF:run_mapper:Module] diff --git a/search_script.py b/search_script.py deleted file mode 100755 index e51c73f..0000000 --- a/search_script.py +++ /dev/null @@ -1,204 +0,0 @@ -# [DEF:search_script:Module] -# -# @SEMANTICS: search, superset, dataset, regex, file_output -# @PURPOSE: Предоставляет утилиты для поиска по текстовым паттернам в метаданных датасетов Superset. -# @LAYER: App -# @RELATION: DEPENDS_ON -> superset_tool.client -# @RELATION: DEPENDS_ON -> superset_tool.utils -# @PUBLIC_API: search_datasets, save_results_to_file, print_search_results, main - -# [SECTION: IMPORTS] -import logging -import re -import os -from typing import Dict, Optional -from requests.exceptions import RequestException -from superset_tool.client import SupersetClient -from superset_tool.exceptions import SupersetAPIError -from superset_tool.utils.logger import SupersetLogger -from superset_tool.utils.init_clients import setup_clients -# [/SECTION] - -# [DEF:search_datasets:Function] -# @PURPOSE: Выполняет поиск по строковому паттерну в метаданных всех датасетов. -# @PRE: `client` должен быть инициализированным экземпляром `SupersetClient`. -# @PRE: `search_pattern` должен быть валидной строкой регулярного выражения. -# @POST: Возвращает словарь с результатами поиска, где ключ - ID датасета, значение - список совпадений. -# @RELATION: CALLS -> client.get_datasets -# @THROW: re.error - Если паттерн регулярного выражения невалиден. -# @THROW: SupersetAPIError, RequestException - При критических ошибках API. -# @PARAM: client (SupersetClient) - Клиент для доступа к API Superset. -# @PARAM: search_pattern (str) - Регулярное выражение для поиска. -# @PARAM: logger (Optional[SupersetLogger]) - Инстанс логгера. -# @RETURN: Optional[Dict] - Словарь с результатами или None, если ничего не найдено. -def search_datasets( - client: SupersetClient, - search_pattern: str, - logger: Optional[SupersetLogger] = None -) -> Optional[Dict]: - logger = logger or SupersetLogger(name="dataset_search") - logger.info(f"[search_datasets][Enter] Searching for pattern: '{search_pattern}'") - try: - _, datasets = client.get_datasets(query={"columns": ["id", "table_name", "sql", "database", "columns"]}) - - if not datasets: - logger.warning("[search_datasets][State] No datasets found.") - return None - - pattern = re.compile(search_pattern, re.IGNORECASE) - results = {} - - for dataset in datasets: - dataset_id = dataset.get('id') - if not dataset_id: - continue - - matches = [] - for field, value in dataset.items(): - value_str = str(value) - if pattern.search(value_str): - match_obj = pattern.search(value_str) - matches.append({ - "field": field, - "match": match_obj.group() if match_obj else "", - "value": value_str - }) - - if matches: - results[dataset_id] = matches - - logger.info(f"[search_datasets][Success] Found matches in {len(results)} datasets.") - return results - - except re.error as e: - logger.error(f"[search_datasets][Failure] Invalid regex pattern: {e}", exc_info=True) - raise - except (SupersetAPIError, RequestException) as e: - logger.critical(f"[search_datasets][Failure] Critical error during search: {e}", exc_info=True) - raise -# [/DEF:search_datasets:Function] - -# [DEF:save_results_to_file:Function] -# @PURPOSE: Сохраняет результаты поиска в текстовый файл. -# @PRE: `results` является словарем, возвращенным `search_datasets`, или `None`. -# @PRE: `filename` должен быть допустимым путем к файлу. -# @POST: Записывает отформатированные результаты в указанный файл. -# @PARAM: results (Optional[Dict]) - Словарь с результатами поиска. -# @PARAM: filename (str) - Имя файла для сохранения результатов. -# @PARAM: logger (Optional[SupersetLogger]) - Инстанс логгера. -# @RETURN: bool - Успешно ли выполнено сохранение. -def save_results_to_file(results: Optional[Dict], filename: str, logger: Optional[SupersetLogger] = None) -> bool: - logger = logger or SupersetLogger(name="file_writer") - logger.info(f"[save_results_to_file][Enter] Saving results to file: {filename}") - try: - formatted_report = print_search_results(results) - with open(filename, 'w', encoding='utf-8') as f: - f.write(formatted_report) - logger.info(f"[save_results_to_file][Success] Results saved to {filename}") - return True - except Exception as e: - logger.error(f"[save_results_to_file][Failure] Failed to save results to file: {e}", exc_info=True) - return False -# [/DEF:save_results_to_file:Function] - -# [DEF:print_search_results:Function] -# @PURPOSE: Форматирует результаты поиска для читаемого вывода в консоль. -# @PRE: `results` является словарем, возвращенным `search_datasets`, или `None`. -# @POST: Возвращает отформатированную строку с результатами. -# @PARAM: results (Optional[Dict]) - Словарь с результатами поиска. -# @PARAM: context_lines (int) - Количество строк контекста для вывода до и после совпадения. -# @RETURN: str - Отформатированный отчет. -def print_search_results(results: Optional[Dict], context_lines: int = 3) -> str: - if not results: - return "Ничего не найдено" - - output = [] - for dataset_id, matches in results.items(): - # Получаем информацию о базе данных для текущего датасета - database_info = "" - # Ищем поле database среди совпадений, чтобы вывести его - for match_info in matches: - if match_info['field'] == 'database': - database_info = match_info['value'] - break - # Если database не найден в совпадениях, пробуем получить из других полей - if not database_info: - # Предполагаем, что база данных может быть в одном из полей, например sql или table_name - # Но для точности лучше использовать специальное поле, которое мы уже получили - pass # Пока не выводим, если не нашли явно - - output.append(f"\n--- Dataset ID: {dataset_id} ---") - if database_info: - output.append(f" Database: {database_info}") - output.append("") # Пустая строка для читабельности - - for match_info in matches: - field, match_text, full_value = match_info['field'], match_info['match'], match_info['value'] - output.append(f" - Поле: {field}") - output.append(f" Совпадение: '{match_text}'") - - lines = full_value.splitlines() - if not lines: continue - - match_line_index = -1 - for i, line in enumerate(lines): - if match_text in line: - match_line_index = i - break - - if match_line_index != -1: - start = max(0, match_line_index - context_lines) - end = min(len(lines), match_line_index + context_lines + 1) - output.append(" Контекст:") - for i in range(start, end): - prefix = f"{i + 1:5d}: " - line_content = lines[i] - if i == match_line_index: - highlighted = line_content.replace(match_text, f">>>{match_text}<<<") - output.append(f" {prefix}{highlighted}") - else: - output.append(f" {prefix}{line_content}") - output.append("-" * 25) - return "\n".join(output) -# [/DEF:print_search_results:Function] - -# [DEF:main:Function] -# @PURPOSE: Основная точка входа для запуска скрипта поиска. -# @RELATION: CALLS -> setup_clients -# @RELATION: CALLS -> search_datasets -# @RELATION: CALLS -> print_search_results -# @RELATION: CALLS -> save_results_to_file -def main(): - logger = SupersetLogger(level=logging.INFO, console=True) - clients = setup_clients(logger) - - target_client = clients['dev5'] - search_query = r"from dm(_view)*.account_debt" - - # Генерируем имя файла на основе времени - import datetime - timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - output_filename = f"search_results_{timestamp}.txt" - - results = search_datasets( - client=target_client, - search_pattern=search_query, - logger=logger - ) - - report = print_search_results(results) - - logger.info(f"[main][Success] Search finished. Report:\n{report}") - - # Сохраняем результаты в файл - success = save_results_to_file(results, output_filename, logger) - if success: - logger.info(f"[main][Success] Results also saved to file: {output_filename}") - else: - logger.error(f"[main][Failure] Failed to save results to file: {output_filename}") -# [/DEF:main:Function] - -if __name__ == "__main__": - main() - -# [/DEF:search_script:Module]