refactor complete

This commit is contained in:
2026-01-22 17:37:17 +03:00
parent 203ce446f4
commit d99a13d91f
41 changed files with 1789 additions and 3052 deletions

View File

@@ -20,6 +20,8 @@ Auto-generated from all feature plans. Last updated: 2025-12-19
- SQLite (`tasks.db`), JSON (`config.json`) (009-backup-scheduler) - SQLite (`tasks.db`), JSON (`config.json`) (009-backup-scheduler)
- Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib) (010-refactor-cli-to-web) - Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib) (010-refactor-cli-to-web)
- SQLite (for job history/results, connection configs), Filesystem (for temporary file uploads) (010-refactor-cli-to-web) - SQLite (for job history/results, connection configs), Filesystem (for temporary file uploads) (010-refactor-cli-to-web)
- Python 3.9+ + FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool) (012-remove-superset-tool)
- SQLite (tasks.db, migrations.db), Filesystem (012-remove-superset-tool)
- Python 3.9+ (Backend), Node.js 18+ (Frontend Build) (001-plugin-arch-svelte-ui) - Python 3.9+ (Backend), Node.js 18+ (Frontend Build) (001-plugin-arch-svelte-ui)
@@ -40,9 +42,9 @@ cd src; pytest; ruff check .
Python 3.9+ (Backend), Node.js 18+ (Frontend Build): Follow standard conventions Python 3.9+ (Backend), Node.js 18+ (Frontend Build): Follow standard conventions
## Recent Changes ## Recent Changes
- 012-remove-superset-tool: Added Python 3.9+ + FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool)
- 010-refactor-cli-to-web: Added Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib) - 010-refactor-cli-to-web: Added Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib)
- 009-backup-scheduler: Added Python 3.9+, Node.js 18+ + FastAPI, APScheduler, SQLAlchemy, SvelteKit, Tailwind CSS - 009-backup-scheduler: Added Python 3.9+, Node.js 18+ + FastAPI, APScheduler, SQLAlchemy, SvelteKit, Tailwind CSS
- 009-backup-scheduler: Added Python 3.9+, Node.js 18+ + FastAPI, APScheduler, SQLAlchemy, SvelteKit, Tailwind CSS
<!-- MANUAL ADDITIONS START --> <!-- MANUAL ADDITIONS START -->

View File

@@ -42,4 +42,5 @@ urllib3==2.6.2
uvicorn==0.38.0 uvicorn==0.38.0
websockets==15.0.1 websockets==15.0.1
pandas pandas
psycopg2-binary psycopg2-binary
openpyxl

View File

@@ -11,12 +11,11 @@
# [SECTION: IMPORTS] # [SECTION: IMPORTS]
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from typing import List, Dict, Optional from typing import List, Dict, Optional
from backend.src.dependencies import get_config_manager, get_scheduler_service from ...dependencies import get_config_manager, get_scheduler_service
from backend.src.core.superset_client import SupersetClient from ...core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from backend.src.core.config_models import Environment as EnvModel from ...core.config_models import Environment as EnvModel
from backend.src.core.logger import belief_scope from ...core.logger import belief_scope
# [/SECTION] # [/SECTION]
router = APIRouter() router = APIRouter()
@@ -114,18 +113,7 @@ async def get_environment_databases(id: str, config_manager=Depends(get_config_m
try: try:
# Initialize SupersetClient from environment config # Initialize SupersetClient from environment config
# Note: We need to map Environment model to SupersetConfig client = SupersetClient(env)
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env.username,
"password": env.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
return client.get_databases_summary() return client.get_databases_summary()
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to fetch databases: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to fetch databases: {str(e)}")

View File

@@ -13,9 +13,9 @@
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from typing import List, Optional from typing import List, Optional
from backend.src.dependencies import get_config_manager from ...dependencies import get_config_manager
from backend.src.core.database import get_db from ...core.database import get_db
from backend.src.models.mapping import DatabaseMapping from ...models.mapping import DatabaseMapping
from pydantic import BaseModel from pydantic import BaseModel
# [/SECTION] # [/SECTION]

View File

@@ -7,10 +7,9 @@
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from typing import List, Dict from typing import List, Dict
from backend.src.dependencies import get_config_manager, get_task_manager from ...dependencies import get_config_manager, get_task_manager
from backend.src.models.dashboard import DashboardMetadata, DashboardSelection from ...models.dashboard import DashboardMetadata, DashboardSelection
from backend.src.core.superset_client import SupersetClient from ...core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
router = APIRouter(prefix="/api", tags=["migration"]) router = APIRouter(prefix="/api", tags=["migration"])
@@ -27,14 +26,7 @@ async def get_dashboards(env_id: str, config_manager=Depends(get_config_manager)
if not env: if not env:
raise HTTPException(status_code=404, detail="Environment not found") raise HTTPException(status_code=404, detail="Environment not found")
config = SupersetConfig( client = SupersetClient(env)
env=env.name,
base_url=env.url,
auth={'provider': 'db', 'username': env.username, 'password': env.password, 'refresh': False},
verify_ssl=True,
timeout=30
)
client = SupersetClient(config)
dashboards = client.get_dashboards_summary() dashboards = client.get_dashboards_summary()
return dashboards return dashboards
# [/DEF:get_dashboards:Function] # [/DEF:get_dashboards:Function]

View File

@@ -17,7 +17,6 @@ from ...dependencies import get_config_manager
from ...core.config_manager import ConfigManager from ...core.config_manager import ConfigManager
from ...core.logger import logger, belief_scope from ...core.logger import logger, belief_scope
from ...core.superset_client import SupersetClient from ...core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
import os import os
# [/SECTION] # [/SECTION]
@@ -28,7 +27,7 @@ router = APIRouter()
# @PRE: Config manager is available. # @PRE: Config manager is available.
# @POST: Returns masked AppConfig. # @POST: Returns masked AppConfig.
# @RETURN: AppConfig - The current configuration. # @RETURN: AppConfig - The current configuration.
@router.get("/", response_model=AppConfig) @router.get("", response_model=AppConfig)
async def get_settings(config_manager: ConfigManager = Depends(get_config_manager)): async def get_settings(config_manager: ConfigManager = Depends(get_config_manager)):
with belief_scope("get_settings"): with belief_scope("get_settings"):
logger.info("[get_settings][Entry] Fetching all settings") logger.info("[get_settings][Entry] Fetching all settings")
@@ -85,17 +84,7 @@ async def add_environment(
# Validate connection before adding # Validate connection before adding
try: try:
superset_config = SupersetConfig( client = SupersetClient(env)
env=env.name,
base_url=env.url,
auth={
"provider": "db",
"username": env.username,
"password": env.password,
"refresh": "true"
}
)
client = SupersetClient(config=superset_config)
client.get_dashboards(query={"page_size": 1}) client.get_dashboards(query={"page_size": 1})
except Exception as e: except Exception as e:
logger.error(f"[add_environment][Coherence:Failed] Connection validation failed: {e}") logger.error(f"[add_environment][Coherence:Failed] Connection validation failed: {e}")
@@ -130,17 +119,7 @@ async def update_environment(
# Validate connection before updating # Validate connection before updating
try: try:
superset_config = SupersetConfig( client = SupersetClient(env_to_validate)
env=env_to_validate.name,
base_url=env_to_validate.url,
auth={
"provider": "db",
"username": env_to_validate.username,
"password": env_to_validate.password,
"refresh": "true"
}
)
client = SupersetClient(config=superset_config)
client.get_dashboards(query={"page_size": 1}) client.get_dashboards(query={"page_size": 1})
except Exception as e: except Exception as e:
logger.error(f"[update_environment][Coherence:Failed] Connection validation failed: {e}") logger.error(f"[update_environment][Coherence:Failed] Connection validation failed: {e}")
@@ -187,21 +166,8 @@ async def test_environment_connection(
raise HTTPException(status_code=404, detail=f"Environment {id} not found") raise HTTPException(status_code=404, detail=f"Environment {id} not found")
try: try:
# Create SupersetConfig
# Note: SupersetConfig expects 'auth' dict with specific keys
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db", # Defaulting to db for now
"username": env.username,
"password": env.password,
"refresh": "true"
}
)
# Initialize client (this will trigger authentication) # Initialize client (this will trigger authentication)
client = SupersetClient(config=superset_config) client = SupersetClient(env)
# Try a simple request to verify # Try a simple request to verify
client.get_dashboards(query={"page_size": 1}) client.get_dashboards(query={"page_size": 1})

View File

@@ -6,10 +6,8 @@
import sys import sys
from pathlib import Path from pathlib import Path
# Add project root to sys.path to allow importing superset_tool # project_root is used for static files mounting
# Assuming app.py is in backend/src/
project_root = Path(__file__).resolve().parent.parent.parent project_root = Path(__file__).resolve().parent.parent.parent
sys.path.append(str(project_root))
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, Request, HTTPException from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
@@ -86,7 +84,7 @@ async def log_requests(request: Request, call_next):
app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"]) app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"]) app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"]) app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(connections.router, prefix="/api/connections", tags=["Connections"]) app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"])
app.include_router(environments.router, prefix="/api/environments", tags=["Environments"]) app.include_router(environments.router, prefix="/api/environments", tags=["Environments"])
app.include_router(mappings.router) app.include_router(mappings.router)
app.include_router(migration.router) app.include_router(migration.router)

View File

@@ -23,6 +23,8 @@ class Environment(BaseModel):
url: str url: str
username: str username: str
password: str # Will be masked in UI password: str # Will be masked in UI
verify_ssl: bool = True
timeout: int = 30
is_default: bool = False is_default: bool = False
backup_schedule: Schedule = Field(default_factory=Schedule) backup_schedule: Schedule = Field(default_factory=Schedule)
# [/DEF:Environment:DataClass] # [/DEF:Environment:DataClass]

View File

@@ -29,11 +29,10 @@ class BeliefFormatter(logging.Formatter):
# @PARAM: record (logging.LogRecord) - The log record to format. # @PARAM: record (logging.LogRecord) - The log record to format.
# @RETURN: str - The formatted log message. # @RETURN: str - The formatted log message.
def format(self, record): def format(self, record):
msg = super().format(record)
anchor_id = getattr(_belief_state, 'anchor_id', None) anchor_id = getattr(_belief_state, 'anchor_id', None)
if anchor_id: if anchor_id:
msg = f"[{anchor_id}][Action] {msg}" record.msg = f"[{anchor_id}][Action] {record.msg}"
return msg return super().format(record)
# [/DEF:format:Function] # [/DEF:format:Function]
# [/DEF:BeliefFormatter:Class] # [/DEF:BeliefFormatter:Class]
@@ -193,6 +192,18 @@ class WebSocketLogHandler(logging.Handler):
# @SEMANTICS: logger, global, instance # @SEMANTICS: logger, global, instance
# @PURPOSE: The global logger instance for the application, configured with both a console handler and the custom WebSocket handler. # @PURPOSE: The global logger instance for the application, configured with both a console handler and the custom WebSocket handler.
logger = logging.getLogger("superset_tools_app") logger = logging.getLogger("superset_tools_app")
# [DEF:believed:Function]
# @PURPOSE: A decorator that wraps a function in a belief scope.
# @PARAM: anchor_id (str) - The identifier for the semantic block.
def believed(anchor_id: str):
def decorator(func):
def wrapper(*args, **kwargs):
with belief_scope(anchor_id):
return func(*args, **kwargs)
return wrapper
return decorator
# [/DEF:believed:Function]
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
# Create a formatter # Create a formatter

View File

@@ -1,82 +1,102 @@
# [DEF:backend.src.core.superset_client:Module] # [DEF:backend.src.core.superset_client:Module]
# #
# @SEMANTICS: superset, api, client, database, metadata # @SEMANTICS: superset, api, client, rest, http, dashboard, dataset, import, export
# @PURPOSE: Extends the base SupersetClient with database-specific metadata fetching. # @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @LAYER: Core # @LAYER: Core
# @RELATION: INHERITS_FROM -> superset_tool.client.SupersetClient # @RELATION: USES -> backend.src.core.utils.network.APIClient
# @RELATION: USES -> backend.src.core.config_models.Environment
# #
# @INVARIANT: All database metadata requests must include UUID and name. # @INVARIANT: All network operations must use the internal APIClient instance.
# @PUBLIC_API: SupersetClient
# [SECTION: IMPORTS] # [SECTION: IMPORTS]
from typing import List, Dict, Optional, Tuple import json
from .logger import belief_scope import zipfile
from superset_tool.client import SupersetClient as BaseSupersetClient from pathlib import Path
from superset_tool.models import SupersetConfig from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from .logger import logger as app_logger, belief_scope
from .utils.network import APIClient, SupersetAPIError, AuthenticationError, DashboardNotFoundError, NetworkError
from .utils.fileio import get_filename_from_headers
from .config_models import Environment
# [/SECTION] # [/SECTION]
# [DEF:SupersetClient:Class] # [DEF:SupersetClient:Class]
# @PURPOSE: Extended SupersetClient for migration-specific operations. # @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
class SupersetClient(BaseSupersetClient): class SupersetClient:
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @PRE: `env` должен быть валидным объектом Environment.
# @POST: Атрибуты `env` и `network` созданы и готовы к работе.
# @PARAM: env (Environment) - Конфигурация окружения.
def __init__(self, env: Environment):
with belief_scope("__init__"):
app_logger.info("[SupersetClient.__init__][Enter] Initializing SupersetClient for env %s.", env.name)
self.env = env
# Construct auth payload expected by Superset API
auth_payload = {
"username": env.username,
"password": env.password,
"provider": "db",
"refresh": "true"
}
self.network = APIClient(
config={
"base_url": env.url,
"auth": auth_payload
},
verify_ssl=env.verify_ssl,
timeout=env.timeout
)
self.delete_before_reimport: bool = False
app_logger.info("[SupersetClient.__init__][Exit] SupersetClient initialized.")
# [/DEF:__init__:Function]
# [DEF:authenticate:Function] # [DEF:authenticate:Function]
# @PURPOSE: Authenticates the client using the configured credentials. # @PURPOSE: Authenticates the client using the configured credentials.
# @PRE: self.network must be initialized with valid auth configuration. # @PRE: self.network must be initialized with valid auth configuration.
# @POST: Client is authenticated and tokens are stored. # @POST: Client is authenticated and tokens are stored.
# @RETURN: Dict[str, str] - Authentication tokens. # @RETURN: Dict[str, str] - Authentication tokens.
def authenticate(self): def authenticate(self) -> Dict[str, str]:
with belief_scope("SupersetClient.authenticate"): with belief_scope("SupersetClient.authenticate"):
return self.network.authenticate() return self.network.authenticate()
# [/DEF:authenticate:Function]
# [DEF:get_databases_summary:Function]
# @PURPOSE: Fetch a summary of databases including uuid, name, and engine.
# @PRE: self.network must be initialized and authenticated.
# @POST: Returns a list of database dictionaries with 'engine' field.
# @RETURN: List[Dict] - Summary of databases.
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
"""
Fetch a summary of databases including uuid, name, and engine.
"""
query = {
"columns": ["uuid", "database_name", "backend"]
}
_, databases = self.get_databases(query=query)
# Map 'backend' to 'engine' for consistency with contracts
for db in databases:
db['engine'] = db.pop('backend', None)
return databases
# [/DEF:get_databases_summary:Function]
# [DEF:get_database_by_uuid:Function] @property
# @PURPOSE: Find a database by its UUID. # [DEF:headers:Function]
# @PRE: db_uuid must be a string. # @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
# @POST: Returns database metadata if found. def headers(self) -> dict:
# @PARAM: db_uuid (str) - The UUID of the database. with belief_scope("headers"):
# @RETURN: Optional[Dict] - Database info if found, else None. return self.network.headers
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]: # [/DEF:headers:Function]
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
""" # [SECTION: DASHBOARD OPERATIONS]
Find a database by its UUID.
""" # [DEF:get_dashboards:Function]
query = { # @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
"filters": [{"col": "uuid", "op": "eq", "value": db_uuid}] # @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса для API.
} # @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список дашбордов).
_, databases = self.get_databases(query=query) def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
return databases[0] if databases else None with belief_scope("get_dashboards"):
# [/DEF:get_database_by_uuid:Function] app_logger.info("[get_dashboards][Enter] Fetching dashboards.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:get_dashboards:Function]
# [DEF:get_dashboards_summary:Function] # [DEF:get_dashboards_summary:Function]
# @PURPOSE: Fetches dashboard metadata optimized for the grid. # @PURPOSE: Fetches dashboard metadata optimized for the grid.
# @PRE: self.network must be authenticated.
# @POST: Returns a list of dashboard dictionaries mapped to the grid schema.
# @RETURN: List[Dict] # @RETURN: List[Dict]
def get_dashboards_summary(self) -> List[Dict]: def get_dashboards_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_dashboards_summary"): with belief_scope("SupersetClient.get_dashboards_summary"):
"""
Fetches dashboard metadata optimized for the grid.
Returns a list of dictionaries mapped to DashboardMetadata fields.
"""
query = { query = {
"columns": ["id", "dashboard_title", "changed_on_utc", "published"] "columns": ["id", "dashboard_title", "changed_on_utc", "published"]
} }
@@ -94,34 +114,287 @@ class SupersetClient(BaseSupersetClient):
return result return result
# [/DEF:get_dashboards_summary:Function] # [/DEF:get_dashboards_summary:Function]
# [DEF:export_dashboard:Function]
# @PURPOSE: Экспортирует дашборд в виде ZIP-архива.
# @PARAM: dashboard_id (int) - ID дашборда для экспорта.
# @RETURN: Tuple[bytes, str] - Бинарное содержимое ZIP-архива и имя файла.
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
app_logger.info("[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id)
response = self.network.request(
method="GET",
endpoint="/dashboard/export/",
params={"q": json.dumps([dashboard_id])},
stream=True,
raw_response=True,
)
response = cast(Response, response)
self._validate_export_response(response, dashboard_id)
filename = self._resolve_export_filename(response, dashboard_id)
app_logger.info("[export_dashboard][Exit] Exported dashboard %s to %s.", dashboard_id, filename)
return response.content, filename
# [/DEF:export_dashboard:Function]
# [DEF:import_dashboard:Function]
# @PURPOSE: Импортирует дашборд из ZIP-файла.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-архиву.
# @PARAM: dash_id (Optional[int]) - ID дашборда для удаления при сбое.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда для поиска ID.
# @RETURN: Dict - Ответ API в случае успеха.
def import_dashboard(self, file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict:
with belief_scope("import_dashboard"):
file_path = str(file_name)
self._validate_import_file(file_path)
try:
return self._do_import(file_path)
except Exception as exc:
app_logger.error("[import_dashboard][Failure] First import attempt failed: %s", exc, exc_info=True)
if not self.delete_before_reimport:
raise
target_id = self._resolve_target_id_for_delete(dash_id, dash_slug)
if target_id is None:
app_logger.error("[import_dashboard][Failure] No ID available for delete-retry.")
raise
self.delete_dashboard(target_id)
app_logger.info("[import_dashboard][State] Deleted dashboard ID %s, retrying import.", target_id)
return self._do_import(file_path)
# [/DEF:import_dashboard:Function]
# [DEF:delete_dashboard:Function]
# @PURPOSE: Удаляет дашборд по его ID или slug.
# @PARAM: dashboard_id (Union[int, str]) - ID или slug дашборда.
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
app_logger.info("[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id)
response = self.network.request(method="DELETE", endpoint=f"/dashboard/{dashboard_id}")
response = cast(Dict, response)
if response.get("result", True) is not False:
app_logger.info("[delete_dashboard][Success] Dashboard %s deleted.", dashboard_id)
else:
app_logger.warning("[delete_dashboard][Warning] Unexpected response while deleting %s: %s", dashboard_id, response)
# [/DEF:delete_dashboard:Function]
# [/SECTION]
# [SECTION: DATASET OPERATIONS]
# [DEF:get_datasets:Function]
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список датасетов).
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
app_logger.info("[get_datasets][Enter] Fetching datasets.")
validated_query = self._validate_query_params(query)
total_count = self._fetch_total_object_count(endpoint="/dataset/")
paginated_data = self._fetch_all_pages(
endpoint="/dataset/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:get_datasets:Function]
# [DEF:get_dataset:Function] # [DEF:get_dataset:Function]
# @PURPOSE: Fetch full dataset structure including columns and metrics. # @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @PRE: dataset_id must be a valid integer. # @PARAM: dataset_id (int) - ID датасета.
# @POST: Returns full dataset metadata from Superset API. # @RETURN: Dict - Информация о датасете.
# @PARAM: dataset_id (int) - The ID of the dataset.
# @RETURN: Dict - The dataset metadata.
def get_dataset(self, dataset_id: int) -> Dict: def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset", f"id={dataset_id}"): with belief_scope("SupersetClient.get_dataset", f"id={dataset_id}"):
""" app_logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
Fetch full dataset structure. response = self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}")
""" response = cast(Dict, response)
return self.network.get(f"/api/v1/dataset/{dataset_id}").json() app_logger.info("[get_dataset][Exit] Got dataset %s.", dataset_id)
return response
# [/DEF:get_dataset:Function] # [/DEF:get_dataset:Function]
# [DEF:update_dataset:Function] # [DEF:update_dataset:Function]
# @PURPOSE: Update dataset metadata. # @PURPOSE: Обновляет данные датасета по его ID.
# @PRE: dataset_id must be valid, data must be a valid Superset dataset payload. # @PARAM: dataset_id (int) - ID датасета.
# @POST: Dataset is updated in Superset. # @PARAM: data (Dict) - Данные для обновления.
# @PARAM: dataset_id (int) - The ID of the dataset. # @RETURN: Dict - Ответ API.
# @PARAM: data (Dict) - The payload for update. def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
def update_dataset(self, dataset_id: int, data: Dict):
with belief_scope("SupersetClient.update_dataset", f"id={dataset_id}"): with belief_scope("SupersetClient.update_dataset", f"id={dataset_id}"):
""" app_logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
Update dataset metadata. response = self.network.request(
""" method="PUT",
self.network.put(f"/api/v1/dataset/{dataset_id}", json=data) endpoint=f"/dataset/{dataset_id}",
data=json.dumps(data),
headers={'Content-Type': 'application/json'}
)
response = cast(Dict, response)
app_logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:update_dataset:Function] # [/DEF:update_dataset:Function]
# [/SECTION]
# [SECTION: DATABASE OPERATIONS]
# [DEF:get_databases:Function]
# @PURPOSE: Получает полный список баз данных.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список баз данных).
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
app_logger.info("[get_databases][Enter] Fetching databases.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = []
total_count = self._fetch_total_object_count(endpoint="/database/")
paginated_data = self._fetch_all_pages(
endpoint="/database/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:get_databases:Function]
# [DEF:get_database:Function]
# @PURPOSE: Получает информацию о конкретной базе данных по её ID.
# @PARAM: database_id (int) - ID базы данных.
# @RETURN: Dict - Информация о базе данных.
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
app_logger.info("[get_database][Enter] Fetching database %s.", database_id)
response = self.network.request(method="GET", endpoint=f"/database/{database_id}")
response = cast(Dict, response)
app_logger.info("[get_database][Exit] Got database %s.", database_id)
return response
# [/DEF:get_database:Function]
# [DEF:get_databases_summary:Function]
# @PURPOSE: Fetch a summary of databases including uuid, name, and engine.
# @RETURN: List[Dict] - Summary of databases.
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
query = {
"columns": ["uuid", "database_name", "backend"]
}
_, databases = self.get_databases(query=query)
# Map 'backend' to 'engine' for consistency with contracts
for db in databases:
db['engine'] = db.pop('backend', None)
return databases
# [/DEF:get_databases_summary:Function]
# [DEF:get_database_by_uuid:Function]
# @PURPOSE: Find a database by its UUID.
# @PARAM: db_uuid (str) - The UUID of the database.
# @RETURN: Optional[Dict] - Database info if found, else None.
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]:
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
query = {
"filters": [{"col": "uuid", "op": "eq", "value": db_uuid}]
}
_, databases = self.get_databases(query=query)
return databases[0] if databases else None
# [/DEF:get_database_by_uuid:Function]
# [/SECTION]
# [SECTION: HELPERS]
# [DEF:_resolve_target_id_for_delete:Function]
def _resolve_target_id_for_delete(self, dash_id: Optional[int], dash_slug: Optional[str]) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
if dash_id is not None:
return dash_id
if dash_slug is not None:
app_logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
app_logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
app_logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
# [/DEF:_resolve_target_id_for_delete:Function]
# [DEF:_do_import:Function]
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
app_logger.debug(f"[_do_import][State] Uploading file: {file_name}")
file_path = Path(file_name)
if not file_path.exists():
app_logger.error(f"[_do_import][Failure] File does not exist: {file_name}")
raise FileNotFoundError(f"File does not exist: {file_name}")
return self.network.upload_file(
endpoint="/dashboard/import/",
file_info={"file_obj": file_path, "file_name": file_path.name, "form_field": "formData"},
extra_data={"overwrite": "true"},
timeout=self.env.timeout * 2,
)
# [/DEF:_do_import:Function]
# [DEF:_validate_export_response:Function]
def _validate_export_response(self, response: Response, dashboard_id: int) -> None:
with belief_scope("_validate_export_response"):
content_type = response.headers.get("Content-Type", "")
if "application/zip" not in content_type:
raise SupersetAPIError(f"Получен не ZIP-архив (Content-Type: {content_type})")
if not response.content:
raise SupersetAPIError("Получены пустые данные при экспорте")
# [/DEF:_validate_export_response:Function]
# [DEF:_resolve_export_filename:Function]
def _resolve_export_filename(self, response: Response, dashboard_id: int) -> str:
with belief_scope("_resolve_export_filename"):
filename = get_filename_from_headers(dict(response.headers))
if not filename:
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
filename = f"dashboard_export_{dashboard_id}_{timestamp}.zip"
app_logger.warning("[_resolve_export_filename][Warning] Generated filename: %s", filename)
return filename
# [/DEF:_resolve_export_filename:Function]
# [DEF:_validate_query_params:Function]
def _validate_query_params(self, query: Optional[Dict]) -> Dict:
with belief_scope("_validate_query_params"):
base_query = {"page": 0, "page_size": 1000}
return {**base_query, **(query or {})}
# [/DEF:_validate_query_params:Function]
# [DEF:_fetch_total_object_count:Function]
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
return self.network.fetch_paginated_count(
endpoint=endpoint,
query_params={"page": 0, "page_size": 1},
count_field="count",
)
# [/DEF:_fetch_total_object_count:Function]
# [DEF:_fetch_all_pages:Function]
def _fetch_all_pages(self, endpoint: str, pagination_options: Dict) -> List[Dict]:
with belief_scope("_fetch_all_pages"):
return self.network.fetch_paginated_data(endpoint=endpoint, pagination_options=pagination_options)
# [/DEF:_fetch_all_pages:Function]
# [DEF:_validate_import_file:Function]
def _validate_import_file(self, zip_path: Union[str, Path]) -> None:
with belief_scope("_validate_import_file"):
path = Path(zip_path)
if not path.exists():
raise FileNotFoundError(f"Файл {zip_path} не существует")
if not zipfile.is_zipfile(path):
raise SupersetAPIError(f"Файл {zip_path} не является ZIP-архивом")
with zipfile.ZipFile(path, "r") as zf:
if not any(n.endswith("metadata.yaml") for n in zf.namelist()):
raise SupersetAPIError(f"Архив {zip_path} не содержит 'metadata.yaml'")
# [/DEF:_validate_import_file:Function]
# [/SECTION]
# [/DEF:SupersetClient:Class] # [/DEF:SupersetClient:Class]
# [/DEF:backend.src.core.superset_client:Module] # [/DEF:backend.src.core.superset_client:Module]

View File

@@ -1,240 +1,237 @@
# [DEF:superset_tool.utils.dataset_mapper:Module] # [DEF:backend.core.utils.dataset_mapper:Module]
# #
# @SEMANTICS: dataset, mapping, postgresql, xlsx, superset # @SEMANTICS: dataset, mapping, postgresql, xlsx, superset
# @PURPOSE: Этот модуль отвечает за обновление метаданных (verbose_map) в датасетах Superset, извлекая их из PostgreSQL или XLSX-файлов. # @PURPOSE: Этот модуль отвечает за обновление метаданных (verbose_map) в датасетах Superset, извлекая их из PostgreSQL или XLSX-файлов.
# @LAYER: Domain # @LAYER: Domain
# @RELATION: DEPENDS_ON -> superset_tool.client # @RELATION: DEPENDS_ON -> backend.core.superset_client
# @RELATION: DEPENDS_ON -> pandas # @RELATION: DEPENDS_ON -> pandas
# @RELATION: DEPENDS_ON -> psycopg2 # @RELATION: DEPENDS_ON -> psycopg2
# @PUBLIC_API: DatasetMapper # @PUBLIC_API: DatasetMapper
# [SECTION: IMPORTS] # [SECTION: IMPORTS]
import pandas as pd # type: ignore import pandas as pd # type: ignore
import psycopg2 # type: ignore import psycopg2 # type: ignore
from superset_tool.client import SupersetClient from typing import Dict, List, Optional, Any
from superset_tool.utils.init_clients import setup_clients from ..logger import logger as app_logger, belief_scope
from superset_tool.utils.logger import SupersetLogger # [/SECTION]
from typing import Dict, List, Optional, Any
# [/SECTION] # [DEF:DatasetMapper:Class]
# @PURPOSE: Класс для меппинга и обновления verbose_map в датасетах Superset.
# [DEF:DatasetMapper:Class] class DatasetMapper:
# @PURPOSE: Класс для меппинга и обновления verbose_map в датасетах Superset. # [DEF:__init__:Function]
class DatasetMapper: # @PURPOSE: Initializes the mapper.
# [DEF:__init__:Function] # @POST: Объект DatasetMapper инициализирован.
# @PURPOSE: Initializes the mapper. def __init__(self):
# @PRE: logger должен быть экземпляром SupersetLogger. pass
# @POST: Объект DatasetMapper инициализирован. # [/DEF:__init__:Function]
def __init__(self, logger: SupersetLogger):
self.logger = logger # [DEF:get_postgres_comments:Function]
# [/DEF:__init__:Function] # @PURPOSE: Извлекает комментарии к колонкам из системного каталога PostgreSQL.
# @PRE: db_config должен содержать валидные параметры подключения (host, port, user, password, dbname).
# [DEF:get_postgres_comments:Function] # @PRE: table_name и table_schema должны быть строками.
# @PURPOSE: Извлекает комментарии к колонкам из системного каталога PostgreSQL. # @POST: Возвращается словарь, где ключи - имена колонок, значения - комментарии из БД.
# @PRE: db_config должен содержать валидные параметры подключения (host, port, user, password, dbname). # @THROW: Exception - При ошибках подключения или выполнения запроса к БД.
# @PRE: table_name и table_schema должны быть строками. # @PARAM: db_config (Dict) - Конфигурация для подключения к БД.
# @POST: Возвращается словарь, где ключи - имена колонок, значения - комментарии из БД. # @PARAM: table_name (str) - Имя таблицы.
# @THROW: Exception - При ошибках подключения или выполнения запроса к БД. # @PARAM: table_schema (str) - Схема таблицы.
# @PARAM: db_config (Dict) - Конфигурация для подключения к БД. # @RETURN: Dict[str, str] - Словарь с комментариями к колонкам.
# @PARAM: table_name (str) - Имя таблицы. def get_postgres_comments(self, db_config: Dict, table_name: str, table_schema: str) -> Dict[str, str]:
# @PARAM: table_schema (str) - Схема таблицы. with belief_scope("Fetch comments from PostgreSQL"):
# @RETURN: Dict[str, str] - Словарь с комментариями к колонкам. app_logger.info("[get_postgres_comments][Enter] Fetching comments from PostgreSQL for %s.%s.", table_schema, table_name)
def get_postgres_comments(self, db_config: Dict, table_name: str, table_schema: str) -> Dict[str, str]: query = f"""
with self.logger.belief_scope("Fetch comments from PostgreSQL"): SELECT
self.logger.info("[get_postgres_comments][Enter] Fetching comments from PostgreSQL for %s.%s.", table_schema, table_name) cols.column_name,
query = f""" CASE
SELECT WHEN pg_catalog.col_description(
cols.column_name, (SELECT c.oid
CASE FROM pg_catalog.pg_class c
WHEN pg_catalog.col_description( JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
(SELECT c.oid WHERE c.relname = cols.table_name
FROM pg_catalog.pg_class c AND n.nspname = cols.table_schema),
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace cols.ordinal_position::int
WHERE c.relname = cols.table_name ) LIKE '%|%' THEN
AND n.nspname = cols.table_schema), split_part(
cols.ordinal_position::int pg_catalog.col_description(
) LIKE '%|%' THEN (SELECT c.oid
split_part( FROM pg_catalog.pg_class c
pg_catalog.col_description( JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
(SELECT c.oid WHERE c.relname = cols.table_name
FROM pg_catalog.pg_class c AND n.nspname = cols.table_schema),
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace cols.ordinal_position::int
WHERE c.relname = cols.table_name ),
AND n.nspname = cols.table_schema), '|',
cols.ordinal_position::int 1
), )
'|', ELSE
1 pg_catalog.col_description(
) (SELECT c.oid
ELSE FROM pg_catalog.pg_class c
pg_catalog.col_description( JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
(SELECT c.oid WHERE c.relname = cols.table_name
FROM pg_catalog.pg_class c AND n.nspname = cols.table_schema),
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace cols.ordinal_position::int
WHERE c.relname = cols.table_name )
AND n.nspname = cols.table_schema), END AS column_comment
cols.ordinal_position::int FROM
) information_schema.columns cols
END AS column_comment WHERE cols.table_catalog = '{db_config.get('dbname')}' AND cols.table_name = '{table_name}' AND cols.table_schema = '{table_schema}';
FROM """
information_schema.columns cols comments = {}
WHERE cols.table_catalog = '{db_config.get('dbname')}' AND cols.table_name = '{table_name}' AND cols.table_schema = '{table_schema}'; try:
""" with psycopg2.connect(**db_config) as conn, conn.cursor() as cursor:
comments = {} cursor.execute(query)
try: for row in cursor.fetchall():
with psycopg2.connect(**db_config) as conn, conn.cursor() as cursor: if row[1]:
cursor.execute(query) comments[row[0]] = row[1]
for row in cursor.fetchall(): app_logger.info("[get_postgres_comments][Success] Fetched %d comments.", len(comments))
if row[1]: except Exception as e:
comments[row[0]] = row[1] app_logger.error("[get_postgres_comments][Failure] %s", e, exc_info=True)
self.logger.info("[get_postgres_comments][Success] Fetched %d comments.", len(comments)) raise
except Exception as e: return comments
self.logger.error("[get_postgres_comments][Failure] %s", e, exc_info=True) # [/DEF:get_postgres_comments:Function]
raise
return comments # [DEF:load_excel_mappings:Function]
# [/DEF:get_postgres_comments:Function] # @PURPOSE: Загружает меппинги 'column_name' -> 'column_comment' из XLSX файла.
# @PRE: file_path должен указывать на существующий XLSX файл.
# [DEF:load_excel_mappings:Function] # @POST: Возвращается словарь с меппингами из файла.
# @PURPOSE: Загружает меппинги 'column_name' -> 'column_comment' из XLSX файла. # @THROW: Exception - При ошибках чтения файла или парсинга.
# @PRE: file_path должен указывать на существующий XLSX файл. # @PARAM: file_path (str) - Путь к XLSX файлу.
# @POST: Возвращается словарь с меппингами из файла. # @RETURN: Dict[str, str] - Словарь с меппингами.
# @THROW: Exception - При ошибках чтения файла или парсинга. def load_excel_mappings(self, file_path: str) -> Dict[str, str]:
# @PARAM: file_path (str) - Путь к XLSX файлу. with belief_scope("Load mappings from Excel"):
# @RETURN: Dict[str, str] - Словарь с меппингами. app_logger.info("[load_excel_mappings][Enter] Loading mappings from %s.", file_path)
def load_excel_mappings(self, file_path: str) -> Dict[str, str]: try:
with self.logger.belief_scope("Load mappings from Excel"): df = pd.read_excel(file_path)
self.logger.info("[load_excel_mappings][Enter] Loading mappings from %s.", file_path) mappings = df.set_index('column_name')['verbose_name'].to_dict()
try: app_logger.info("[load_excel_mappings][Success] Loaded %d mappings.", len(mappings))
df = pd.read_excel(file_path) return mappings
mappings = df.set_index('column_name')['verbose_name'].to_dict() except Exception as e:
self.logger.info("[load_excel_mappings][Success] Loaded %d mappings.", len(mappings)) app_logger.error("[load_excel_mappings][Failure] %s", e, exc_info=True)
return mappings raise
except Exception as e: # [/DEF:load_excel_mappings:Function]
self.logger.error("[load_excel_mappings][Failure] %s", e, exc_info=True)
raise # [DEF:run_mapping:Function]
# [/DEF:load_excel_mappings:Function] # @PURPOSE: Основная функция для выполнения меппинга и обновления verbose_map датасета в Superset.
# @PRE: superset_client должен быть авторизован.
# [DEF:run_mapping:Function] # @PRE: dataset_id должен быть существующим ID в Superset.
# @PURPOSE: Основная функция для выполнения меппинга и обновления verbose_map датасета в Superset. # @POST: Если найдены изменения, датасет в Superset обновлен через API.
# @PRE: superset_client должен быть авторизован. # @RELATION: CALLS -> self.get_postgres_comments
# @PRE: dataset_id должен быть существующим ID в Superset. # @RELATION: CALLS -> self.load_excel_mappings
# @POST: Если найдены изменения, датасет в Superset обновлен через API. # @RELATION: CALLS -> superset_client.get_dataset
# @RELATION: CALLS -> self.get_postgres_comments # @RELATION: CALLS -> superset_client.update_dataset
# @RELATION: CALLS -> self.load_excel_mappings # @PARAM: superset_client (Any) - Клиент Superset.
# @RELATION: CALLS -> superset_client.get_dataset # @PARAM: dataset_id (int) - ID датасета для обновления.
# @RELATION: CALLS -> superset_client.update_dataset # @PARAM: source (str) - Источник данных ('postgres', 'excel', 'both').
# @PARAM: superset_client (SupersetClient) - Клиент Superset. # @PARAM: postgres_config (Optional[Dict]) - Конфигурация для подключения к PostgreSQL.
# @PARAM: dataset_id (int) - ID датасета для обновления. # @PARAM: excel_path (Optional[str]) - Путь к XLSX файлу.
# @PARAM: source (str) - Источник данных ('postgres', 'excel', 'both'). # @PARAM: table_name (Optional[str]) - Имя таблицы в PostgreSQL.
# @PARAM: postgres_config (Optional[Dict]) - Конфигурация для подключения к PostgreSQL. # @PARAM: table_schema (Optional[str]) - Схема таблицы в PostgreSQL.
# @PARAM: excel_path (Optional[str]) - Путь к XLSX файлу. def run_mapping(self, superset_client: Any, dataset_id: int, source: str, postgres_config: Optional[Dict] = None, excel_path: Optional[str] = None, table_name: Optional[str] = None, table_schema: Optional[str] = None):
# @PARAM: table_name (Optional[str]) - Имя таблицы в PostgreSQL. with belief_scope(f"Run dataset mapping for ID {dataset_id}"):
# @PARAM: table_schema (Optional[str]) - Схема таблицы в PostgreSQL. app_logger.info("[run_mapping][Enter] Starting dataset mapping for ID %d from source '%s'.", dataset_id, source)
def run_mapping(self, superset_client: SupersetClient, dataset_id: int, source: str, postgres_config: Optional[Dict] = None, excel_path: Optional[str] = None, table_name: Optional[str] = None, table_schema: Optional[str] = None): mappings: Dict[str, str] = {}
with self.logger.belief_scope(f"Run dataset mapping for ID {dataset_id}"):
self.logger.info("[run_mapping][Enter] Starting dataset mapping for ID %d from source '%s'.", dataset_id, source) try:
mappings: Dict[str, str] = {} if source in ['postgres', 'both']:
assert postgres_config and table_name and table_schema, "Postgres config is required."
try: mappings.update(self.get_postgres_comments(postgres_config, table_name, table_schema))
if source in ['postgres', 'both']: if source in ['excel', 'both']:
assert postgres_config and table_name and table_schema, "Postgres config is required." assert excel_path, "Excel path is required."
mappings.update(self.get_postgres_comments(postgres_config, table_name, table_schema)) mappings.update(self.load_excel_mappings(excel_path))
if source in ['excel', 'both']: if source not in ['postgres', 'excel', 'both']:
assert excel_path, "Excel path is required." app_logger.error("[run_mapping][Failure] Invalid source: %s.", source)
mappings.update(self.load_excel_mappings(excel_path)) return
if source not in ['postgres', 'excel', 'both']:
self.logger.error("[run_mapping][Failure] Invalid source: %s.", source) dataset_response = superset_client.get_dataset(dataset_id)
return dataset_data = dataset_response['result']
dataset_response = superset_client.get_dataset(dataset_id) original_columns = dataset_data.get('columns', [])
dataset_data = dataset_response['result'] updated_columns = []
changes_made = False
original_columns = dataset_data.get('columns', [])
updated_columns = [] for column in original_columns:
changes_made = False col_name = column.get('column_name')
for column in original_columns: new_column = {
col_name = column.get('column_name') "column_name": col_name,
"id": column.get("id"),
new_column = { "advanced_data_type": column.get("advanced_data_type"),
"column_name": col_name, "description": column.get("description"),
"id": column.get("id"), "expression": column.get("expression"),
"advanced_data_type": column.get("advanced_data_type"), "extra": column.get("extra"),
"description": column.get("description"), "filterable": column.get("filterable"),
"expression": column.get("expression"), "groupby": column.get("groupby"),
"extra": column.get("extra"), "is_active": column.get("is_active"),
"filterable": column.get("filterable"), "is_dttm": column.get("is_dttm"),
"groupby": column.get("groupby"), "python_date_format": column.get("python_date_format"),
"is_active": column.get("is_active"), "type": column.get("type"),
"is_dttm": column.get("is_dttm"), "uuid": column.get("uuid"),
"python_date_format": column.get("python_date_format"), "verbose_name": column.get("verbose_name"),
"type": column.get("type"), }
"uuid": column.get("uuid"),
"verbose_name": column.get("verbose_name"), new_column = {k: v for k, v in new_column.items() if v is not None}
}
if col_name in mappings:
new_column = {k: v for k, v in new_column.items() if v is not None} mapping_value = mappings[col_name]
if isinstance(mapping_value, str) and new_column.get('verbose_name') != mapping_value:
if col_name in mappings: new_column['verbose_name'] = mapping_value
mapping_value = mappings[col_name] changes_made = True
if isinstance(mapping_value, str) and new_column.get('verbose_name') != mapping_value:
new_column['verbose_name'] = mapping_value updated_columns.append(new_column)
changes_made = True
updated_metrics = []
updated_columns.append(new_column) for metric in dataset_data.get("metrics", []):
new_metric = {
updated_metrics = [] "id": metric.get("id"),
for metric in dataset_data.get("metrics", []): "metric_name": metric.get("metric_name"),
new_metric = { "expression": metric.get("expression"),
"id": metric.get("id"), "verbose_name": metric.get("verbose_name"),
"metric_name": metric.get("metric_name"), "description": metric.get("description"),
"expression": metric.get("expression"), "d3format": metric.get("d3format"),
"verbose_name": metric.get("verbose_name"), "currency": metric.get("currency"),
"description": metric.get("description"), "extra": metric.get("extra"),
"d3format": metric.get("d3format"), "warning_text": metric.get("warning_text"),
"currency": metric.get("currency"), "metric_type": metric.get("metric_type"),
"extra": metric.get("extra"), "uuid": metric.get("uuid"),
"warning_text": metric.get("warning_text"), }
"metric_type": metric.get("metric_type"), updated_metrics.append({k: v for k, v in new_metric.items() if v is not None})
"uuid": metric.get("uuid"),
} if changes_made:
updated_metrics.append({k: v for k, v in new_metric.items() if v is not None}) payload_for_update = {
"database_id": dataset_data.get("database", {}).get("id"),
if changes_made: "table_name": dataset_data.get("table_name"),
payload_for_update = { "schema": dataset_data.get("schema"),
"database_id": dataset_data.get("database", {}).get("id"), "columns": updated_columns,
"table_name": dataset_data.get("table_name"), "owners": [owner["id"] for owner in dataset_data.get("owners", [])],
"schema": dataset_data.get("schema"), "metrics": updated_metrics,
"columns": updated_columns, "extra": dataset_data.get("extra"),
"owners": [owner["id"] for owner in dataset_data.get("owners", [])], "description": dataset_data.get("description"),
"metrics": updated_metrics, "sql": dataset_data.get("sql"),
"extra": dataset_data.get("extra"), "cache_timeout": dataset_data.get("cache_timeout"),
"description": dataset_data.get("description"), "catalog": dataset_data.get("catalog"),
"sql": dataset_data.get("sql"), "default_endpoint": dataset_data.get("default_endpoint"),
"cache_timeout": dataset_data.get("cache_timeout"), "external_url": dataset_data.get("external_url"),
"catalog": dataset_data.get("catalog"), "fetch_values_predicate": dataset_data.get("fetch_values_predicate"),
"default_endpoint": dataset_data.get("default_endpoint"), "filter_select_enabled": dataset_data.get("filter_select_enabled"),
"external_url": dataset_data.get("external_url"), "is_managed_externally": dataset_data.get("is_managed_externally"),
"fetch_values_predicate": dataset_data.get("fetch_values_predicate"), "is_sqllab_view": dataset_data.get("is_sqllab_view"),
"filter_select_enabled": dataset_data.get("filter_select_enabled"), "main_dttm_col": dataset_data.get("main_dttm_col"),
"is_managed_externally": dataset_data.get("is_managed_externally"), "normalize_columns": dataset_data.get("normalize_columns"),
"is_sqllab_view": dataset_data.get("is_sqllab_view"), "offset": dataset_data.get("offset"),
"main_dttm_col": dataset_data.get("main_dttm_col"), "template_params": dataset_data.get("template_params"),
"normalize_columns": dataset_data.get("normalize_columns"), }
"offset": dataset_data.get("offset"),
"template_params": dataset_data.get("template_params"), payload_for_update = {k: v for k, v in payload_for_update.items() if v is not None}
}
superset_client.update_dataset(dataset_id, payload_for_update)
payload_for_update = {k: v for k, v in payload_for_update.items() if v is not None} app_logger.info("[run_mapping][Success] Dataset %d columns' verbose_name updated.", dataset_id)
else:
superset_client.update_dataset(dataset_id, payload_for_update) app_logger.info("[run_mapping][State] No changes in columns' verbose_name, skipping update.")
self.logger.info("[run_mapping][Success] Dataset %d columns' verbose_name updated.", dataset_id)
else: except (AssertionError, FileNotFoundError, Exception) as e:
self.logger.info("[run_mapping][State] No changes in columns' verbose_name, skipping update.") app_logger.error("[run_mapping][Failure] %s", e, exc_info=True)
return
except (AssertionError, FileNotFoundError, Exception) as e: # [/DEF:run_mapping:Function]
self.logger.error("[run_mapping][Failure] %s", e, exc_info=True) # [/DEF:DatasetMapper:Class]
return
# [/DEF:run_mapping:Function] # [/DEF:backend.core.utils.dataset_mapper:Module]
# [/DEF:DatasetMapper:Class]
# [/DEF:superset_tool.utils.dataset_mapper:Module]

View File

@@ -1,507 +1,486 @@
# [DEF:superset_tool.utils.fileio:Module] # [DEF:backend.core.utils.fileio:Module]
# #
# @SEMANTICS: file, io, zip, yaml, temp, archive, utility # @SEMANTICS: file, io, zip, yaml, temp, archive, utility
# @PURPOSE: Предоставляет набор утилит для управления файловыми операциями, включая работу с временными файлами, архивами ZIP, файлами YAML и очистку директорий. # @PURPOSE: Предоставляет набор утилит для управления файловыми операциями, включая работу с временными файлами, архивами ZIP, файлами YAML и очистку директорий.
# @LAYER: Infra # @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.exceptions # @RELATION: DEPENDS_ON -> backend.src.core.logger
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger # @RELATION: DEPENDS_ON -> pyyaml
# @RELATION: DEPENDS_ON -> pyyaml # @PUBLIC_API: create_temp_file, remove_empty_directories, read_dashboard_from_disk, calculate_crc32, RetentionPolicy, archive_exports, save_and_unpack_dashboard, update_yamls, create_dashboard_export, sanitize_filename, get_filename_from_headers, consolidate_archive_folders
# @PUBLIC_API: create_temp_file, remove_empty_directories, read_dashboard_from_disk, calculate_crc32, RetentionPolicy, archive_exports, save_and_unpack_dashboard, update_yamls, create_dashboard_export, sanitize_filename, get_filename_from_headers, consolidate_archive_folders
# [SECTION: IMPORTS]
# [SECTION: IMPORTS] import os
import os import re
import re import zipfile
import zipfile from pathlib import Path
from pathlib import Path from typing import Any, Optional, Tuple, Dict, List, Union, LiteralString, Generator
from typing import Any, Optional, Tuple, Dict, List, Union, LiteralString, Generator from contextlib import contextmanager
from contextlib import contextmanager import tempfile
import tempfile from datetime import date, datetime
from datetime import date, datetime import shutil
import glob import zlib
import shutil from dataclasses import dataclass
import zlib import yaml
from dataclasses import dataclass from ..logger import logger as app_logger, belief_scope
import yaml # [/SECTION]
from superset_tool.exceptions import InvalidZipFormatError
from superset_tool.utils.logger import SupersetLogger # [DEF:InvalidZipFormatError:Class]
# [/SECTION] class InvalidZipFormatError(Exception):
pass
# [DEF:create_temp_file:Function]
# @PURPOSE: Контекстный менеджер для создания временного файла или директории с гарантированным удалением. # [DEF:create_temp_file:Function]
# @PRE: suffix должен быть строкой, определяющей тип ресурса. # @PURPOSE: Контекстный менеджер для создания временного файла или директории с гарантированным удалением.
# @POST: Временный ресурс создан и путь к нему возвращен; ресурс удален после выхода из контекста. # @PRE: suffix должен быть строкой, определяющей тип ресурса.
# @PARAM: content (Optional[bytes]) - Бинарное содержимое для записи во временный файл. # @POST: Временный ресурс создан и путь к нему возвращен; ресурс удален после выхода из контекста.
# @PARAM: suffix (str) - Суффикс ресурса. Если `.dir`, создается директория. # @PARAM: content (Optional[bytes]) - Бинарное содержимое для записи во временный файл.
# @PARAM: mode (str) - Режим записи в файл (e.g., 'wb'). # @PARAM: suffix (str) - Суффикс ресурса. Если `.dir`, создается директория.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. # @PARAM: mode (str) - Режим записи в файл (e.g., 'wb').
# @YIELDS: Path - Путь к временному ресурсу. # @YIELDS: Path - Путь к временному ресурсу.
# @THROW: IOError - При ошибках создания ресурса. # @THROW: IOError - При ошибках создания ресурса.
@contextmanager @contextmanager
def create_temp_file(content: Optional[bytes] = None, suffix: str = ".zip", mode: str = 'wb', dry_run = False, logger: Optional[SupersetLogger] = None) -> Generator[Path, None, None]: def create_temp_file(content: Optional[bytes] = None, suffix: str = ".zip", mode: str = 'wb', dry_run = False) -> Generator[Path, None, None]:
logger = logger or SupersetLogger(name="fileio") with belief_scope("Create temporary resource"):
with logger.belief_scope("Create temporary resource"): resource_path = None
resource_path = None is_dir = suffix.startswith('.dir')
is_dir = suffix.startswith('.dir') try:
try: if is_dir:
if is_dir: with tempfile.TemporaryDirectory(suffix=suffix) as temp_dir:
with tempfile.TemporaryDirectory(suffix=suffix) as temp_dir: resource_path = Path(temp_dir)
resource_path = Path(temp_dir) app_logger.debug("[create_temp_file][State] Created temporary directory: %s", resource_path)
logger.debug("[create_temp_file][State] Created temporary directory: %s", resource_path) yield resource_path
yield resource_path else:
else: fd, temp_path_str = tempfile.mkstemp(suffix=suffix)
fd, temp_path_str = tempfile.mkstemp(suffix=suffix) resource_path = Path(temp_path_str)
resource_path = Path(temp_path_str) os.close(fd)
os.close(fd) if content:
if content: resource_path.write_bytes(content)
resource_path.write_bytes(content) app_logger.debug("[create_temp_file][State] Created temporary file: %s", resource_path)
logger.debug("[create_temp_file][State] Created temporary file: %s", resource_path) yield resource_path
yield resource_path finally:
finally: if resource_path and resource_path.exists() and not dry_run:
if resource_path and resource_path.exists() and not dry_run: try:
try: if resource_path.is_dir():
if resource_path.is_dir(): shutil.rmtree(resource_path)
shutil.rmtree(resource_path) app_logger.debug("[create_temp_file][Cleanup] Removed temporary directory: %s", resource_path)
logger.debug("[create_temp_file][Cleanup] Removed temporary directory: %s", resource_path) else:
else: resource_path.unlink()
resource_path.unlink() app_logger.debug("[create_temp_file][Cleanup] Removed temporary file: %s", resource_path)
logger.debug("[create_temp_file][Cleanup] Removed temporary file: %s", resource_path) except OSError as e:
except OSError as e: app_logger.error("[create_temp_file][Failure] Error during cleanup of %s: %s", resource_path, e)
logger.error("[create_temp_file][Failure] Error during cleanup of %s: %s", resource_path, e) # [/DEF:create_temp_file:Function]
# [/DEF:create_temp_file:Function]
# [DEF:remove_empty_directories:Function]
# [DEF:remove_empty_directories:Function] # @PURPOSE: Рекурсивно удаляет все пустые поддиректории, начиная с указанного пути.
# @PURPOSE: Рекурсивно удаляет все пустые поддиректории, начиная с указанного пути. # @PRE: root_dir должен быть путем к существующей директории.
# @PRE: root_dir должен быть путем к существующей директории. # @POST: Все пустые поддиректории удалены, возвращено их количество.
# @POST: Все пустые поддиректории удалены, возвращено их количество. # @PARAM: root_dir (str) - Путь к корневой директории для очистки.
# @PARAM: root_dir (str) - Путь к корневой директории для очистки. # @RETURN: int - Количество удаленных директорий.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. def remove_empty_directories(root_dir: str) -> int:
# @RETURN: int - Количество удаленных директорий. with belief_scope(f"Remove empty directories in {root_dir}"):
def remove_empty_directories(root_dir: str, logger: Optional[SupersetLogger] = None) -> int: app_logger.info("[remove_empty_directories][Enter] Starting cleanup of empty directories in %s", root_dir)
logger = logger or SupersetLogger(name="fileio") removed_count = 0
with logger.belief_scope(f"Remove empty directories in {root_dir}"): if not os.path.isdir(root_dir):
logger.info("[remove_empty_directories][Enter] Starting cleanup of empty directories in %s", root_dir) app_logger.error("[remove_empty_directories][Failure] Directory not found: %s", root_dir)
removed_count = 0 return 0
if not os.path.isdir(root_dir): for current_dir, _, _ in os.walk(root_dir, topdown=False):
logger.error("[remove_empty_directories][Failure] Directory not found: %s", root_dir) if not os.listdir(current_dir):
return 0 try:
for current_dir, _, _ in os.walk(root_dir, topdown=False): os.rmdir(current_dir)
if not os.listdir(current_dir): removed_count += 1
try: app_logger.info("[remove_empty_directories][State] Removed empty directory: %s", current_dir)
os.rmdir(current_dir) except OSError as e:
removed_count += 1 app_logger.error("[remove_empty_directories][Failure] Failed to remove %s: %s", current_dir, e)
logger.info("[remove_empty_directories][State] Removed empty directory: %s", current_dir) app_logger.info("[remove_empty_directories][Exit] Removed %d empty directories.", removed_count)
except OSError as e: return removed_count
logger.error("[remove_empty_directories][Failure] Failed to remove %s: %s", current_dir, e) # [/DEF:remove_empty_directories:Function]
logger.info("[remove_empty_directories][Exit] Removed %d empty directories.", removed_count)
return removed_count # [DEF:read_dashboard_from_disk:Function]
# [/DEF:remove_empty_directories:Function] # @PURPOSE: Читает бинарное содержимое файла с диска.
# @PRE: file_path должен указывать на существующий файл.
# [DEF:read_dashboard_from_disk:Function] # @POST: Возвращает байты содержимого и имя файла.
# @PURPOSE: Читает бинарное содержимое файла с диска. # @PARAM: file_path (str) - Путь к файлу.
# @PRE: file_path должен указывать на существующий файл. # @RETURN: Tuple[bytes, str] - Кортеж (содержимое, имя файла).
# @POST: Возвращает байты содержимого и имя файла. # @THROW: FileNotFoundError - Если файл не найден.
# @PARAM: file_path (str) - Путь к файлу. def read_dashboard_from_disk(file_path: str) -> Tuple[bytes, str]:
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. with belief_scope(f"Read dashboard from {file_path}"):
# @RETURN: Tuple[bytes, str] - Кортеж (содержимое, имя файла). path = Path(file_path)
# @THROW: FileNotFoundError - Если файл не найден. assert path.is_file(), f"Файл дашборда не найден: {file_path}"
def read_dashboard_from_disk(file_path: str, logger: Optional[SupersetLogger] = None) -> Tuple[bytes, str]: app_logger.info("[read_dashboard_from_disk][Enter] Reading file: %s", file_path)
logger = logger or SupersetLogger(name="fileio") content = path.read_bytes()
with logger.belief_scope(f"Read dashboard from {file_path}"): if not content:
path = Path(file_path) app_logger.warning("[read_dashboard_from_disk][Warning] File is empty: %s", file_path)
assert path.is_file(), f"Файл дашборда не найден: {file_path}" return content, path.name
logger.info("[read_dashboard_from_disk][Enter] Reading file: %s", file_path) # [/DEF:read_dashboard_from_disk:Function]
content = path.read_bytes()
if not content: # [DEF:calculate_crc32:Function]
logger.warning("[read_dashboard_from_disk][Warning] File is empty: %s", file_path) # @PURPOSE: Вычисляет контрольную сумму CRC32 для файла.
return content, path.name # @PRE: file_path должен быть объектом Path к существующему файлу.
# [/DEF:read_dashboard_from_disk:Function] # @POST: Возвращает 8-значную hex-строку CRC32.
# @PARAM: file_path (Path) - Путь к файлу.
# [DEF:calculate_crc32:Function] # @RETURN: str - 8-значное шестнадцатеричное представление CRC32.
# @PURPOSE: Вычисляет контрольную сумму CRC32 для файла. # @THROW: IOError - При ошибках чтения файла.
# @PRE: file_path должен быть объектом Path к существующему файлу. def calculate_crc32(file_path: Path) -> str:
# @POST: Возвращает 8-значную hex-строку CRC32. with belief_scope(f"Calculate CRC32 for {file_path}"):
# @PARAM: file_path (Path) - Путь к файлу. with open(file_path, 'rb') as f:
# @RETURN: str - 8-значное шестнадцатеричное представление CRC32. crc32_value = zlib.crc32(f.read())
# @THROW: IOError - При ошибках чтения файла. return f"{crc32_value:08x}"
def calculate_crc32(file_path: Path) -> str: # [/DEF:calculate_crc32:Function]
logger = SupersetLogger(name="fileio")
with logger.belief_scope(f"Calculate CRC32 for {file_path}"): # [SECTION: DATA_CLASSES]
with open(file_path, 'rb') as f: # [DEF:RetentionPolicy:DataClass]
crc32_value = zlib.crc32(f.read()) # @PURPOSE: Определяет политику хранения для архивов (ежедневные, еженедельные, ежемесячные).
return f"{crc32_value:08x}" @dataclass
# [/DEF:calculate_crc32:Function] class RetentionPolicy:
daily: int = 7
# [SECTION: DATA_CLASSES] weekly: int = 4
# [DEF:RetentionPolicy:DataClass] monthly: int = 12
# @PURPOSE: Определяет политику хранения для архивов (ежедневные, еженедельные, ежемесячные). # [/DEF:RetentionPolicy:DataClass]
@dataclass # [/SECTION]
class RetentionPolicy:
daily: int = 7 # [DEF:archive_exports:Function]
weekly: int = 4 # @PURPOSE: Управляет архивом экспортированных файлов, применяя политику хранения и дедупликацию.
monthly: int = 12 # @PRE: output_dir должен быть путем к существующей директории.
# [/DEF:RetentionPolicy:DataClass] # @POST: Старые или дублирующиеся архивы удалены согласно политике.
# [/SECTION] # @RELATION: CALLS -> apply_retention_policy
# @RELATION: CALLS -> calculate_crc32
# [DEF:archive_exports:Function] # @PARAM: output_dir (str) - Директория с архивами.
# @PURPOSE: Управляет архивом экспортированных файлов, применяя политику хранения и дедупликацию. # @PARAM: policy (RetentionPolicy) - Политика хранения.
# @PRE: output_dir должен быть путем к существующей директории. # @PARAM: deduplicate (bool) - Флаг для включения удаления дубликатов по CRC32.
# @POST: Старые или дублирующиеся архивы удалены согласно политике. def archive_exports(output_dir: str, policy: RetentionPolicy, deduplicate: bool = False) -> None:
# @RELATION: CALLS -> apply_retention_policy with belief_scope(f"Archive exports in {output_dir}"):
# @RELATION: CALLS -> calculate_crc32 output_path = Path(output_dir)
# @PARAM: output_dir (str) - Директория с архивами. if not output_path.is_dir():
# @PARAM: policy (RetentionPolicy) - Политика хранения. app_logger.warning("[archive_exports][Skip] Archive directory not found: %s", output_dir)
# @PARAM: deduplicate (bool) - Флаг для включения удаления дубликатов по CRC32. return
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def archive_exports(output_dir: str, policy: RetentionPolicy, deduplicate: bool = False, logger: Optional[SupersetLogger] = None) -> None: app_logger.info("[archive_exports][Enter] Managing archive in %s", output_dir)
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Archive exports in {output_dir}"): # 1. Collect all zip files
output_path = Path(output_dir) zip_files = list(output_path.glob("*.zip"))
if not output_path.is_dir(): if not zip_files:
logger.warning("[archive_exports][Skip] Archive directory not found: %s", output_dir) app_logger.info("[archive_exports][State] No zip files found in %s", output_dir)
return return
logger.info("[archive_exports][Enter] Managing archive in %s", output_dir) # 2. Deduplication
if deduplicate:
# 1. Collect all zip files app_logger.info("[archive_exports][State] Starting deduplication...")
zip_files = list(output_path.glob("*.zip")) checksums = {}
if not zip_files: files_to_remove = []
logger.info("[archive_exports][State] No zip files found in %s", output_dir)
return # Sort by modification time (newest first) to keep the latest version
zip_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
# 2. Deduplication
if deduplicate: for file_path in zip_files:
logger.info("[archive_exports][State] Starting deduplication...") try:
checksums = {} crc = calculate_crc32(file_path)
files_to_remove = [] if crc in checksums:
files_to_remove.append(file_path)
# Sort by modification time (newest first) to keep the latest version app_logger.debug("[archive_exports][State] Duplicate found: %s (same as %s)", file_path.name, checksums[crc].name)
zip_files.sort(key=lambda f: f.stat().st_mtime, reverse=True) else:
checksums[crc] = file_path
for file_path in zip_files: except Exception as e:
try: app_logger.error("[archive_exports][Failure] Failed to calculate CRC32 for %s: %s", file_path, e)
crc = calculate_crc32(file_path)
if crc in checksums: for f in files_to_remove:
files_to_remove.append(file_path) try:
logger.debug("[archive_exports][State] Duplicate found: %s (same as %s)", file_path.name, checksums[crc].name) f.unlink()
else: zip_files.remove(f)
checksums[crc] = file_path app_logger.info("[archive_exports][State] Removed duplicate: %s", f.name)
except Exception as e: except OSError as e:
logger.error("[archive_exports][Failure] Failed to calculate CRC32 for %s: %s", file_path, e) app_logger.error("[archive_exports][Failure] Failed to remove duplicate %s: %s", f, e)
for f in files_to_remove: # 3. Retention Policy
try: files_with_dates = []
f.unlink() for file_path in zip_files:
zip_files.remove(f) # Try to extract date from filename
logger.info("[archive_exports][State] Removed duplicate: %s", f.name) # Pattern: ..._YYYYMMDD_HHMMSS.zip or ..._YYYYMMDD.zip
except OSError as e: match = re.search(r'_(\d{8})_', file_path.name)
logger.error("[archive_exports][Failure] Failed to remove duplicate %s: %s", f, e) file_date = None
if match:
# 3. Retention Policy try:
files_with_dates = [] date_str = match.group(1)
for file_path in zip_files: file_date = datetime.strptime(date_str, "%Y%m%d").date()
# Try to extract date from filename except ValueError:
# Pattern: ..._YYYYMMDD_HHMMSS.zip or ..._YYYYMMDD.zip pass
match = re.search(r'_(\d{8})_', file_path.name)
file_date = None if not file_date:
if match: # Fallback to modification time
try: file_date = datetime.fromtimestamp(file_path.stat().st_mtime).date()
date_str = match.group(1)
file_date = datetime.strptime(date_str, "%Y%m%d").date() files_with_dates.append((file_path, file_date))
except ValueError:
pass files_to_keep = apply_retention_policy(files_with_dates, policy)
if not file_date: for file_path, _ in files_with_dates:
# Fallback to modification time if file_path not in files_to_keep:
file_date = datetime.fromtimestamp(file_path.stat().st_mtime).date() try:
file_path.unlink()
files_with_dates.append((file_path, file_date)) app_logger.info("[archive_exports][State] Removed by retention policy: %s", file_path.name)
except OSError as e:
files_to_keep = apply_retention_policy(files_with_dates, policy, logger) app_logger.error("[archive_exports][Failure] Failed to remove %s: %s", file_path, e)
# [/DEF:archive_exports:Function]
for file_path, _ in files_with_dates:
if file_path not in files_to_keep: # [DEF:apply_retention_policy:Function]
try: # @PURPOSE: (Helper) Применяет политику хранения к списку файлов, возвращая те, что нужно сохранить.
file_path.unlink() # @PRE: files_with_dates is a list of (Path, date) tuples.
logger.info("[archive_exports][State] Removed by retention policy: %s", file_path.name) # @POST: Returns a set of files to keep.
except OSError as e: # @PARAM: files_with_dates (List[Tuple[Path, date]]) - Список файлов с датами.
logger.error("[archive_exports][Failure] Failed to remove %s: %s", file_path, e) # @PARAM: policy (RetentionPolicy) - Политика хранения.
# [/DEF:archive_exports:Function] # @RETURN: set - Множество путей к файлам, которые должны быть сохранены.
def apply_retention_policy(files_with_dates: List[Tuple[Path, date]], policy: RetentionPolicy) -> set:
# [DEF:apply_retention_policy:Function] with belief_scope("Apply retention policy"):
# @PURPOSE: (Helper) Применяет политику хранения к списку файлов, возвращая те, что нужно сохранить. # Сортируем по дате (от новой к старой)
# @PRE: files_with_dates is a list of (Path, date) tuples. sorted_files = sorted(files_with_dates, key=lambda x: x[1], reverse=True)
# @POST: Returns a set of files to keep. # Словарь для хранения файлов по категориям
# @PARAM: files_with_dates (List[Tuple[Path, date]]) - Список файлов с датами. daily_files = []
# @PARAM: policy (RetentionPolicy) - Политика хранения. weekly_files = []
# @PARAM: logger (SupersetLogger) - Логгер. monthly_files = []
# @RETURN: set - Множество путей к файлам, которые должны быть сохранены. today = date.today()
def apply_retention_policy(files_with_dates: List[Tuple[Path, date]], policy: RetentionPolicy, logger: SupersetLogger) -> set: for file_path, file_date in sorted_files:
with logger.belief_scope("Apply retention policy"): # Ежедневные
# Сортируем по дате (от новой к старой) if (today - file_date).days < policy.daily:
sorted_files = sorted(files_with_dates, key=lambda x: x[1], reverse=True) daily_files.append(file_path)
# Словарь для хранения файлов по категориям # Еженедельные
daily_files = [] elif (today - file_date).days < policy.weekly * 7:
weekly_files = [] weekly_files.append(file_path)
monthly_files = [] # Ежемесячные
today = date.today() elif (today - file_date).days < policy.monthly * 30:
for file_path, file_date in sorted_files: monthly_files.append(file_path)
# Ежедневные # Возвращаем множество файлов, которые нужно сохранить
if (today - file_date).days < policy.daily: files_to_keep = set()
daily_files.append(file_path) files_to_keep.update(daily_files)
# Еженедельные files_to_keep.update(weekly_files[:policy.weekly])
elif (today - file_date).days < policy.weekly * 7: files_to_keep.update(monthly_files[:policy.monthly])
weekly_files.append(file_path) app_logger.debug("[apply_retention_policy][State] Keeping %d files according to retention policy", len(files_to_keep))
# Ежемесячные return files_to_keep
elif (today - file_date).days < policy.monthly * 30: # [/DEF:apply_retention_policy:Function]
monthly_files.append(file_path)
# Возвращаем множество файлов, которые нужно сохранить # [DEF:save_and_unpack_dashboard:Function]
files_to_keep = set() # @PURPOSE: Сохраняет бинарное содержимое ZIP-архива на диск и опционально распаковывает его.
files_to_keep.update(daily_files) # @PRE: zip_content должен быть байтами валидного ZIP-архива.
files_to_keep.update(weekly_files[:policy.weekly]) # @POST: ZIP-файл сохранен, и если unpack=True, он распакован в output_dir.
files_to_keep.update(monthly_files[:policy.monthly]) # @PARAM: zip_content (bytes) - Содержимое ZIP-архива.
logger.debug("[apply_retention_policy][State] Keeping %d files according to retention policy", len(files_to_keep)) # @PARAM: output_dir (Union[str, Path]) - Директория для сохранения.
return files_to_keep # @PARAM: unpack (bool) - Флаг, нужно ли распаковывать архив.
# [/DEF:apply_retention_policy:Function] # @PARAM: original_filename (Optional[str]) - Исходное имя файла для сохранения.
# @RETURN: Tuple[Path, Optional[Path]] - Путь к ZIP-файлу и, если применимо, путь к директории с распаковкой.
# [DEF:save_and_unpack_dashboard:Function] # @THROW: InvalidZipFormatError - При ошибке формата ZIP.
# @PURPOSE: Сохраняет бинарное содержимое ZIP-архива на диск и опционально распаковывает его. def save_and_unpack_dashboard(zip_content: bytes, output_dir: Union[str, Path], unpack: bool = False, original_filename: Optional[str] = None) -> Tuple[Path, Optional[Path]]:
# @PRE: zip_content должен быть байтами валидного ZIP-архива. with belief_scope("Save and unpack dashboard"):
# @POST: ZIP-файл сохранен, и если unpack=True, он распакован в output_dir. app_logger.info("[save_and_unpack_dashboard][Enter] Processing dashboard. Unpack: %s", unpack)
# @PARAM: zip_content (bytes) - Содержимое ZIP-архива. try:
# @PARAM: output_dir (Union[str, Path]) - Директория для сохранения. output_path = Path(output_dir)
# @PARAM: unpack (bool) - Флаг, нужно ли распаковывать архив. output_path.mkdir(parents=True, exist_ok=True)
# @PARAM: original_filename (Optional[str]) - Исходное имя файла для сохранения. zip_name = sanitize_filename(original_filename) if original_filename else f"dashboard_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. zip_path = output_path / zip_name
# @RETURN: Tuple[Path, Optional[Path]] - Путь к ZIP-файлу и, если применимо, путь к директории с распаковкой. zip_path.write_bytes(zip_content)
# @THROW: InvalidZipFormatError - При ошибке формата ZIP. app_logger.info("[save_and_unpack_dashboard][State] Dashboard saved to: %s", zip_path)
def save_and_unpack_dashboard(zip_content: bytes, output_dir: Union[str, Path], unpack: bool = False, original_filename: Optional[str] = None, logger: Optional[SupersetLogger] = None) -> Tuple[Path, Optional[Path]]: if unpack:
logger = logger or SupersetLogger(name="fileio") with zipfile.ZipFile(zip_path, 'r') as zip_ref:
with logger.belief_scope("Save and unpack dashboard"): zip_ref.extractall(output_path)
logger.info("[save_and_unpack_dashboard][Enter] Processing dashboard. Unpack: %s", unpack) app_logger.info("[save_and_unpack_dashboard][State] Dashboard unpacked to: %s", output_path)
try: return zip_path, output_path
output_path = Path(output_dir) return zip_path, None
output_path.mkdir(parents=True, exist_ok=True) except zipfile.BadZipFile as e:
zip_name = sanitize_filename(original_filename) if original_filename else f"dashboard_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" app_logger.error("[save_and_unpack_dashboard][Failure] Invalid ZIP archive: %s", e)
zip_path = output_path / zip_name raise InvalidZipFormatError(f"Invalid ZIP file: {e}") from e
zip_path.write_bytes(zip_content) # [/DEF:save_and_unpack_dashboard:Function]
logger.info("[save_and_unpack_dashboard][State] Dashboard saved to: %s", zip_path)
if unpack: # [DEF:update_yamls:Function]
with zipfile.ZipFile(zip_path, 'r') as zip_ref: # @PURPOSE: Обновляет конфигурации в YAML-файлах, заменяя значения или применяя regex.
zip_ref.extractall(output_path) # @PRE: path должен быть существующей директорией.
logger.info("[save_and_unpack_dashboard][State] Dashboard unpacked to: %s", output_path) # @POST: Все YAML файлы в директории обновлены согласно переданным параметрам.
return zip_path, output_path # @RELATION: CALLS -> _update_yaml_file
return zip_path, None # @THROW: FileNotFoundError - Если `path` не существует.
except zipfile.BadZipFile as e: # @PARAM: db_configs (Optional[List[Dict]]) - Список конфигураций для замены.
logger.error("[save_and_unpack_dashboard][Failure] Invalid ZIP archive: %s", e) # @PARAM: path (str) - Путь к директории с YAML файлами.
raise InvalidZipFormatError(f"Invalid ZIP file: {e}") from e # @PARAM: regexp_pattern (Optional[LiteralString]) - Паттерн для поиска.
# [/DEF:save_and_unpack_dashboard:Function] # @PARAM: replace_string (Optional[LiteralString]) - Строка для замены.
def update_yamls(db_configs: Optional[List[Dict[str, Any]]] = None, path: str = "dashboards", regexp_pattern: Optional[LiteralString] = None, replace_string: Optional[LiteralString] = None) -> None:
# [DEF:update_yamls:Function] with belief_scope("Update YAML configurations"):
# @PURPOSE: Обновляет конфигурации в YAML-файлах, заменяя значения или применяя regex. app_logger.info("[update_yamls][Enter] Starting YAML configuration update.")
# @PRE: path должен быть существующей директорией. dir_path = Path(path)
# @POST: Все YAML файлы в директории обновлены согласно переданным параметрам. assert dir_path.is_dir(), f"Путь {path} не существует или не является директорией"
# @RELATION: CALLS -> _update_yaml_file
# @THROW: FileNotFoundError - Если `path` не существует. configs: List[Dict[str, Any]] = db_configs or []
# @PARAM: db_configs (Optional[List[Dict]]) - Список конфигураций для замены.
# @PARAM: path (str) - Путь к директории с YAML файлами. for file_path in dir_path.rglob("*.yaml"):
# @PARAM: regexp_pattern (Optional[LiteralString]) - Паттерн для поиска. _update_yaml_file(file_path, configs, regexp_pattern, replace_string)
# @PARAM: replace_string (Optional[LiteralString]) - Строка для замены. # [/DEF:update_yamls:Function]
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def update_yamls(db_configs: Optional[List[Dict[str, Any]]] = None, path: str = "dashboards", regexp_pattern: Optional[LiteralString] = None, replace_string: Optional[LiteralString] = None, logger: Optional[SupersetLogger] = None) -> None: # [DEF:_update_yaml_file:Function]
logger = logger or SupersetLogger(name="fileio") # @PURPOSE: (Helper) Обновляет один YAML файл.
with logger.belief_scope("Update YAML configurations"): # @PRE: file_path должен быть объектом Path к существующему YAML файлу.
logger.info("[update_yamls][Enter] Starting YAML configuration update.") # @POST: Файл обновлен согласно переданным конфигурациям или регулярному выражению.
dir_path = Path(path) # @PARAM: file_path (Path) - Путь к файлу.
assert dir_path.is_dir(), f"Путь {path} не существует или не является директорией" # @PARAM: db_configs (List[Dict]) - Конфигурации.
# @PARAM: regexp_pattern (Optional[str]) - Паттерн.
configs: List[Dict[str, Any]] = db_configs or [] # @PARAM: replace_string (Optional[str]) - Замена.
def _update_yaml_file(file_path: Path, db_configs: List[Dict[str, Any]], regexp_pattern: Optional[str], replace_string: Optional[str]) -> None:
for file_path in dir_path.rglob("*.yaml"): with belief_scope(f"Update YAML file: {file_path}"):
_update_yaml_file(file_path, configs, regexp_pattern, replace_string, logger) # Читаем содержимое файла
# [/DEF:update_yamls:Function] try:
with open(file_path, 'r', encoding='utf-8') as f:
# [DEF:_update_yaml_file:Function] content = f.read()
# @PURPOSE: (Helper) Обновляет один YAML файл. except Exception as e:
# @PRE: file_path должен быть объектом Path к существующему YAML файлу. app_logger.error("[_update_yaml_file][Failure] Failed to read %s: %s", file_path, e)
# @POST: Файл обновлен согласно переданным конфигурациям или регулярному выражению. return
# @PARAM: file_path (Path) - Путь к файлу. # Если задан pattern и replace_string, применяем замену по регулярному выражению
# @PARAM: db_configs (List[Dict]) - Конфигурации. if regexp_pattern and replace_string:
# @PARAM: regexp_pattern (Optional[str]) - Паттерн. try:
# @PARAM: replace_string (Optional[str]) - Замена. new_content = re.sub(regexp_pattern, replace_string, content)
# @PARAM: logger (SupersetLogger) - Логгер. if new_content != content:
def _update_yaml_file(file_path: Path, db_configs: List[Dict[str, Any]], regexp_pattern: Optional[str], replace_string: Optional[str], logger: SupersetLogger) -> None: with open(file_path, 'w', encoding='utf-8') as f:
with logger.belief_scope(f"Update YAML file: {file_path}"): f.write(new_content)
# Читаем содержимое файла app_logger.info("[_update_yaml_file][State] Updated %s using regex pattern", file_path)
try: except Exception as e:
with open(file_path, 'r', encoding='utf-8') as f: app_logger.error("[_update_yaml_file][Failure] Error applying regex to %s: %s", file_path, e)
content = f.read() # Если заданы конфигурации, заменяем значения (поддержка old/new)
except Exception as e: if db_configs:
logger.error("[_update_yaml_file][Failure] Failed to read %s: %s", file_path, e) try:
return # Прямой текстовый заменитель для старых/новых значений, чтобы сохранить структуру файла
# Если задан pattern и replace_string, применяем замену по регулярному выражению modified_content = content
if regexp_pattern and replace_string: for cfg in db_configs:
try: # Ожидаем структуру: {'old': {...}, 'new': {...}}
new_content = re.sub(regexp_pattern, replace_string, content) old_cfg = cfg.get('old', {})
if new_content != content: new_cfg = cfg.get('new', {})
with open(file_path, 'w', encoding='utf-8') as f: for key, old_val in old_cfg.items():
f.write(new_content) if key in new_cfg:
logger.info("[_update_yaml_file][State] Updated %s using regex pattern", file_path) new_val = new_cfg[key]
except Exception as e: # Заменяем только точные совпадения старого значения в тексте YAML, используя ключ для контекста
logger.error("[_update_yaml_file][Failure] Error applying regex to %s: %s", file_path, e) if isinstance(old_val, str):
# Если заданы конфигурации, заменяем значения (поддержка old/new) # Ищем паттерн: key: "value" или key: value
if db_configs: key_pattern = re.escape(key)
try: val_pattern = re.escape(old_val)
# Прямой текстовый заменитель для старых/новых значений, чтобы сохранить структуру файла # Группы: 1=ключ+разделитель, 2=открывающая кавычка (опц), 3=значение, 4=закрывающая кавычка (опц)
modified_content = content pattern = rf'({key_pattern}\s*:\s*)(["\']?)({val_pattern})(["\']?)'
for cfg in db_configs:
# Ожидаем структуру: {'old': {...}, 'new': {...}} # [DEF:replacer:Function]
old_cfg = cfg.get('old', {}) # @PURPOSE: Функция замены, сохраняющая кавычки если они были.
new_cfg = cfg.get('new', {}) # @PRE: match должен быть объектом совпадения регулярного выражения.
for key, old_val in old_cfg.items(): # @POST: Возвращает строку с новым значением, сохраняя префикс и кавычки.
if key in new_cfg: def replacer(match):
new_val = new_cfg[key] prefix = match.group(1)
# Заменяем только точные совпадения старого значения в тексте YAML, используя ключ для контекста quote_open = match.group(2)
if isinstance(old_val, str): quote_close = match.group(4)
# Ищем паттерн: key: "value" или key: value return f"{prefix}{quote_open}{new_val}{quote_close}"
key_pattern = re.escape(key) # [/DEF:replacer:Function]
val_pattern = re.escape(old_val)
# Группы: 1=ключ+разделитель, 2=открывающая кавычка (опц), 3=значение, 4=закрывающая кавычка (опц) modified_content = re.sub(pattern, replacer, modified_content)
pattern = rf'({key_pattern}\s*:\s*)(["\']?)({val_pattern})(["\']?)' app_logger.info("[_update_yaml_file][State] Replaced '%s' with '%s' for key %s in %s", old_val, new_val, key, file_path)
# Записываем обратно изменённый контент без парсинга YAML, сохраняем оригинальное форматирование
# [DEF:replacer:Function] with open(file_path, 'w', encoding='utf-8') as f:
# @PURPOSE: Функция замены, сохраняющая кавычки если они были. f.write(modified_content)
# @PRE: match должен быть объектом совпадения регулярного выражения. except Exception as e:
# @POST: Возвращает строку с новым значением, сохраняя префикс и кавычки. app_logger.error("[_update_yaml_file][Failure] Error performing raw replacement in %s: %s", file_path, e)
def replacer(match): # [/DEF:_update_yaml_file:Function]
with logger.belief_scope("replacer"):
prefix = match.group(1) # [DEF:create_dashboard_export:Function]
quote_open = match.group(2) # @PURPOSE: Создает ZIP-архив из указанных исходных путей.
quote_close = match.group(4) # @PRE: source_paths должен содержать существующие пути.
return f"{prefix}{quote_open}{new_val}{quote_close}" # @POST: ZIP-архив создан по пути zip_path.
# [/DEF:replacer:Function] # @PARAM: zip_path (Union[str, Path]) - Путь для сохранения ZIP архива.
# @PARAM: source_paths (List[Union[str, Path]]) - Список исходных путей для архивации.
modified_content = re.sub(pattern, replacer, modified_content) # @PARAM: exclude_extensions (Optional[List[str]]) - Список расширений для исключения.
logger.info("[_update_yaml_file][State] Replaced '%s' with '%s' for key %s in %s", old_val, new_val, key, file_path) # @RETURN: bool - `True` при успехе, `False` при ошибке.
# Записываем обратно изменённый контент без парсинга YAML, сохраняем оригинальное форматирование def create_dashboard_export(zip_path: Union[str, Path], source_paths: List[Union[str, Path]], exclude_extensions: Optional[List[str]] = None) -> bool:
with open(file_path, 'w', encoding='utf-8') as f: with belief_scope(f"Create dashboard export: {zip_path}"):
f.write(modified_content) app_logger.info("[create_dashboard_export][Enter] Packing dashboard: %s -> %s", source_paths, zip_path)
except Exception as e: try:
logger.error("[_update_yaml_file][Failure] Error performing raw replacement in %s: %s", file_path, e) exclude_ext = [ext.lower() for ext in exclude_extensions or []]
# [/DEF:_update_yaml_file:Function] with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for src_path_str in source_paths:
# [DEF:create_dashboard_export:Function] src_path = Path(src_path_str)
# @PURPOSE: Создает ZIP-архив из указанных исходных путей. assert src_path.exists(), f"Путь не найден: {src_path}"
# @PRE: source_paths должен содержать существующие пути. for item in src_path.rglob('*'):
# @POST: ZIP-архив создан по пути zip_path. if item.is_file() and item.suffix.lower() not in exclude_ext:
# @PARAM: zip_path (Union[str, Path]) - Путь для сохранения ZIP архива. arcname = item.relative_to(src_path.parent)
# @PARAM: source_paths (List[Union[str, Path]]) - Список исходных путей для архивации. zipf.write(item, arcname)
# @PARAM: exclude_extensions (Optional[List[str]]) - Список расширений для исключения. app_logger.info("[create_dashboard_export][Exit] Archive created: %s", zip_path)
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. return True
# @RETURN: bool - `True` при успехе, `False` при ошибке. except (IOError, zipfile.BadZipFile, AssertionError) as e:
def create_dashboard_export(zip_path: Union[str, Path], source_paths: List[Union[str, Path]], exclude_extensions: Optional[List[str]] = None, logger: Optional[SupersetLogger] = None) -> bool: app_logger.error("[create_dashboard_export][Failure] Error: %s", e, exc_info=True)
logger = logger or SupersetLogger(name="fileio") return False
with logger.belief_scope(f"Create dashboard export: {zip_path}"): # [/DEF:create_dashboard_export:Function]
logger.info("[create_dashboard_export][Enter] Packing dashboard: %s -> %s", source_paths, zip_path)
try: # [DEF:sanitize_filename:Function]
exclude_ext = [ext.lower() for ext in exclude_extensions or []] # @PURPOSE: Очищает строку от символов, недопустимых в именах файлов.
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: # @PRE: filename должен быть строкой.
for src_path_str in source_paths: # @POST: Возвращает строку без спецсимволов.
src_path = Path(src_path_str) # @PARAM: filename (str) - Исходное имя файла.
assert src_path.exists(), f"Путь не найден: {src_path}" # @RETURN: str - Очищенная строка.
for item in src_path.rglob('*'): def sanitize_filename(filename: str) -> str:
if item.is_file() and item.suffix.lower() not in exclude_ext: with belief_scope(f"Sanitize filename: {filename}"):
arcname = item.relative_to(src_path.parent) return re.sub(r'[\\/*?:"<>|]', "_", filename).strip()
zipf.write(item, arcname) # [/DEF:sanitize_filename:Function]
logger.info("[create_dashboard_export][Exit] Archive created: %s", zip_path)
return True # [DEF:get_filename_from_headers:Function]
except (IOError, zipfile.BadZipFile, AssertionError) as e: # @PURPOSE: Извлекает имя файла из HTTP заголовка 'Content-Disposition'.
logger.error("[create_dashboard_export][Failure] Error: %s", e, exc_info=True) # @PRE: headers должен быть словарем заголовков.
return False # @POST: Возвращает имя файла или None, если заголовок отсутствует.
# [/DEF:create_dashboard_export:Function] # @PARAM: headers (dict) - Словарь HTTP заголовков.
# @RETURN: Optional[str] - Имя файла or `None`.
# [DEF:sanitize_filename:Function] def get_filename_from_headers(headers: dict) -> Optional[str]:
# @PURPOSE: Очищает строку от символов, недопустимых в именах файлов. with belief_scope("Get filename from headers"):
# @PRE: filename должен быть строкой. content_disposition = headers.get("Content-Disposition", "")
# @POST: Возвращает строку без спецсимволов. if match := re.search(r'filename="?([^"]+)"?', content_disposition):
# @PARAM: filename (str) - Исходное имя файла. return match.group(1).strip()
# @RETURN: str - Очищенная строка. return None
def sanitize_filename(filename: str) -> str: # [/DEF:get_filename_from_headers:Function]
logger = SupersetLogger(name="fileio")
with logger.belief_scope(f"Sanitize filename: {filename}"): # [DEF:consolidate_archive_folders:Function]
return re.sub(r'[\\/*?:"<>|]', "_", filename).strip() # @PURPOSE: Консолидирует директории архивов на основе общего слага в имени.
# [/DEF:sanitize_filename:Function] # @PRE: root_directory должен быть объектом Path к существующей директории.
# @POST: Директории с одинаковым префиксом объединены в одну.
# [DEF:get_filename_from_headers:Function] # @THROW: TypeError, ValueError - Если `root_directory` невалиден.
# @PURPOSE: Извлекает имя файла из HTTP заголовка 'Content-Disposition'. # @PARAM: root_directory (Path) - Корневая директория для консолидации.
# @PRE: headers должен быть словарем заголовков. def consolidate_archive_folders(root_directory: Path) -> None:
# @POST: Возвращает имя файла или None, если заголовок отсутствует. with belief_scope(f"Consolidate archives in {root_directory}"):
# @PARAM: headers (dict) - Словарь HTTP заголовков. assert isinstance(root_directory, Path), "root_directory must be a Path object."
# @RETURN: Optional[str] - Имя файла or `None`. assert root_directory.is_dir(), "root_directory must be an existing directory."
def get_filename_from_headers(headers: dict) -> Optional[str]:
logger = SupersetLogger(name="fileio") app_logger.info("[consolidate_archive_folders][Enter] Consolidating archives in %s", root_directory)
with logger.belief_scope("Get filename from headers"): # Собираем все директории с архивами
content_disposition = headers.get("Content-Disposition", "") archive_dirs = []
if match := re.search(r'filename="?([^"]+)"?', content_disposition): for item in root_directory.iterdir():
return match.group(1).strip() if item.is_dir():
return None # Проверяем, есть ли в директории ZIP-архивы
# [/DEF:get_filename_from_headers:Function] if any(item.glob("*.zip")):
archive_dirs.append(item)
# [DEF:consolidate_archive_folders:Function] # Группируем по слагу (части имени до первого '_')
# @PURPOSE: Консолидирует директории архивов на основе общего слага в имени. slug_groups = {}
# @PRE: root_directory должен быть объектом Path к существующей директории. for dir_path in archive_dirs:
# @POST: Директории с одинаковым префиксом объединены в одну. dir_name = dir_path.name
# @THROW: TypeError, ValueError - Если `root_directory` невалиден. slug = dir_name.split('_')[0] if '_' in dir_name else dir_name
# @PARAM: root_directory (Path) - Корневая директория для консолидации. if slug not in slug_groups:
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера. slug_groups[slug] = []
def consolidate_archive_folders(root_directory: Path, logger: Optional[SupersetLogger] = None) -> None: slug_groups[slug].append(dir_path)
logger = logger or SupersetLogger(name="fileio") # Для каждой группы консолидируем
with logger.belief_scope(f"Consolidate archives in {root_directory}"): for slug, dirs in slug_groups.items():
assert isinstance(root_directory, Path), "root_directory must be a Path object." if len(dirs) <= 1:
assert root_directory.is_dir(), "root_directory must be an existing directory." continue
# Создаем целевую директорию
logger.info("[consolidate_archive_folders][Enter] Consolidating archives in %s", root_directory) target_dir = root_directory / slug
# Собираем все директории с архивами target_dir.mkdir(exist_ok=True)
archive_dirs = [] app_logger.info("[consolidate_archive_folders][State] Consolidating %d directories under %s", len(dirs), target_dir)
for item in root_directory.iterdir(): # Перемещаем содержимое
if item.is_dir(): for source_dir in dirs:
# Проверяем, есть ли в директории ZIP-архивы if source_dir == target_dir:
if any(item.glob("*.zip")): continue
archive_dirs.append(item) for item in source_dir.iterdir():
# Группируем по слагу (части имени до первого '_') dest_item = target_dir / item.name
slug_groups = {} try:
for dir_path in archive_dirs: if item.is_dir():
dir_name = dir_path.name shutil.move(str(item), str(dest_item))
slug = dir_name.split('_')[0] if '_' in dir_name else dir_name else:
if slug not in slug_groups: shutil.move(str(item), str(dest_item))
slug_groups[slug] = [] except Exception as e:
slug_groups[slug].append(dir_path) app_logger.error("[consolidate_archive_folders][Failure] Failed to move %s to %s: %s", item, dest_item, e)
# Для каждой группы консолидируем # Удаляем исходную директорию
for slug, dirs in slug_groups.items(): try:
if len(dirs) <= 1: source_dir.rmdir()
continue app_logger.info("[consolidate_archive_folders][State] Removed source directory: %s", source_dir)
# Создаем целевую директорию except Exception as e:
target_dir = root_directory / slug app_logger.error("[consolidate_archive_folders][Failure] Failed to remove source directory %s: %s", source_dir, e)
target_dir.mkdir(exist_ok=True) # [/DEF:consolidate_archive_folders:Function]
logger.info("[consolidate_archive_folders][State] Consolidating %d directories under %s", len(dirs), target_dir)
# Перемещаем содержимое # [/DEF:backend.core.utils.fileio:Module]
for source_dir in dirs:
if source_dir == target_dir:
continue
for item in source_dir.iterdir():
dest_item = target_dir / item.name
try:
if item.is_dir():
shutil.move(str(item), str(dest_item))
else:
shutil.move(str(item), str(dest_item))
except Exception as e:
logger.error("[consolidate_archive_folders][Failure] Failed to move %s to %s: %s", item, dest_item, e)
# Удаляем исходную директорию
try:
source_dir.rmdir()
logger.info("[consolidate_archive_folders][State] Removed source directory: %s", source_dir)
except Exception as e:
logger.error("[consolidate_archive_folders][Failure] Failed to remove source directory %s: %s", source_dir, e)
# [/DEF:consolidate_archive_folders:Function]
# [/DEF:superset_tool.utils.fileio:Module]

View File

@@ -1,265 +1,286 @@
# [DEF:superset_tool.utils.network:Module] # [DEF:backend.core.utils.network:Module]
# #
# @SEMANTICS: network, http, client, api, requests, session, authentication # @SEMANTICS: network, http, client, api, requests, session, authentication
# @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок. # @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок.
# @LAYER: Infra # @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.exceptions # @RELATION: DEPENDS_ON -> backend.src.core.logger
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger # @RELATION: DEPENDS_ON -> requests
# @RELATION: DEPENDS_ON -> requests # @PUBLIC_API: APIClient
# @PUBLIC_API: APIClient
# [SECTION: IMPORTS]
# [SECTION: IMPORTS] from typing import Optional, Dict, Any, List, Union, cast
from typing import Optional, Dict, Any, List, Union, cast import json
import json import io
import io from pathlib import Path
from pathlib import Path import requests
import requests from requests.adapters import HTTPAdapter
from requests.adapters import HTTPAdapter import urllib3
import urllib3 from urllib3.util.retry import Retry
from superset_tool.utils.logger import belief_scope from ..logger import logger as app_logger, belief_scope
from urllib3.util.retry import Retry # [/SECTION]
from superset_tool.exceptions import AuthenticationError, NetworkError, DashboardNotFoundError, SupersetAPIError, PermissionDeniedError
from superset_tool.utils.logger import SupersetLogger # [DEF:SupersetAPIError:Class]
# [/SECTION] class SupersetAPIError(Exception):
def __init__(self, message: str = "Superset API error", **context: Any):
# [DEF:APIClient:Class] self.context = context
# @PURPOSE: Инкапсулирует HTTP-логику для работы с API, включая сессии, аутентификацию, и обработку запросов. super().__init__(f"[API_FAILURE] {message} | Context: {self.context}")
class APIClient:
DEFAULT_TIMEOUT = 30 # [DEF:AuthenticationError:Class]
class AuthenticationError(SupersetAPIError):
# [DEF:__init__:Function] def __init__(self, message: str = "Authentication failed", **context: Any):
# @PURPOSE: Инициализирует API клиент с конфигурацией, сессией и логгером. super().__init__(message, type="authentication", **context)
# @PARAM: config (Dict[str, Any]) - Конфигурация.
# @PARAM: verify_ssl (bool) - Проверять ли SSL. # [DEF:PermissionDeniedError:Class]
# @PARAM: timeout (int) - Таймаут запросов. class PermissionDeniedError(AuthenticationError):
# @PARAM: logger (Optional[SupersetLogger]) - Логгер. def __init__(self, message: str = "Permission denied", **context: Any):
# @PRE: config must contain 'base_url' and 'auth'. super().__init__(message, **context)
# @POST: APIClient instance is initialized with a session.
def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT, logger: Optional[SupersetLogger] = None): # [DEF:DashboardNotFoundError:Class]
with belief_scope("__init__"): class DashboardNotFoundError(SupersetAPIError):
self.logger = logger or SupersetLogger(name="APIClient") def __init__(self, resource_id: Union[int, str], message: str = "Dashboard not found", **context: Any):
self.logger.info("[APIClient.__init__][Entry] Initializing APIClient.") super().__init__(f"Dashboard '{resource_id}' {message}", subtype="not_found", resource_id=resource_id, **context)
self.base_url: str = config.get("base_url", "")
self.auth = config.get("auth") # [DEF:NetworkError:Class]
self.request_settings = {"verify_ssl": verify_ssl, "timeout": timeout} class NetworkError(Exception):
self.session = self._init_session() def __init__(self, message: str = "Network connection failed", **context: Any):
self._tokens: Dict[str, str] = {} self.context = context
self._authenticated = False super().__init__(f"[NETWORK_FAILURE] {message} | Context: {self.context}")
self.logger.info("[APIClient.__init__][Exit] APIClient initialized.")
# [/DEF:__init__:Function] # [DEF:APIClient:Class]
# @PURPOSE: Инкапсулирует HTTP-логику для работы с API, включая сессии, аутентификацию, и обработку запросов.
# [DEF:_init_session:Function] class APIClient:
# @PURPOSE: Создает и настраивает `requests.Session` с retry-логикой. DEFAULT_TIMEOUT = 30
# @PRE: self.request_settings must be initialized.
# @POST: Returns a configured requests.Session instance. # [DEF:__init__:Function]
# @RETURN: requests.Session - Настроенная сессия. # @PURPOSE: Инициализирует API клиент с конфигурацией, сессией и логгером.
def _init_session(self) -> requests.Session: # @PARAM: config (Dict[str, Any]) - Конфигурация.
with belief_scope("_init_session"): # @PARAM: verify_ssl (bool) - Проверять ли SSL.
session = requests.Session() # @PARAM: timeout (int) - Таймаут запросов.
retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]) # @PRE: config must contain 'base_url' and 'auth'.
adapter = HTTPAdapter(max_retries=retries) # @POST: APIClient instance is initialized with a session.
session.mount('http://', adapter) def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT):
session.mount('https://', adapter) with belief_scope("__init__"):
if not self.request_settings["verify_ssl"]: app_logger.info("[APIClient.__init__][Entry] Initializing APIClient.")
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) self.base_url: str = config.get("base_url", "")
self.logger.warning("[_init_session][State] SSL verification disabled.") self.auth = config.get("auth")
session.verify = self.request_settings["verify_ssl"] self.request_settings = {"verify_ssl": verify_ssl, "timeout": timeout}
return session self.session = self._init_session()
# [/DEF:_init_session:Function] self._tokens: Dict[str, str] = {}
self._authenticated = False
# [DEF:authenticate:Function] app_logger.info("[APIClient.__init__][Exit] APIClient initialized.")
# @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены. # [/DEF:__init__:Function]
# @PRE: self.auth and self.base_url must be valid.
# @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`. # [DEF:_init_session:Function]
# @RETURN: Dict[str, str] - Словарь с токенами. # @PURPOSE: Создает и настраивает `requests.Session` с retry-логикой.
# @THROW: AuthenticationError, NetworkError - при ошибках. # @PRE: self.request_settings must be initialized.
def authenticate(self) -> Dict[str, str]: # @POST: Returns a configured requests.Session instance.
with belief_scope("authenticate"): # @RETURN: requests.Session - Настроенная сессия.
self.logger.info("[authenticate][Enter] Authenticating to %s", self.base_url) def _init_session(self) -> requests.Session:
try: with belief_scope("_init_session"):
login_url = f"{self.base_url}/security/login" session = requests.Session()
response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"]) retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
response.raise_for_status() adapter = HTTPAdapter(max_retries=retries)
access_token = response.json()["access_token"] session.mount('http://', adapter)
session.mount('https://', adapter)
csrf_url = f"{self.base_url}/security/csrf_token/" if not self.request_settings["verify_ssl"]:
csrf_response = self.session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}, timeout=self.request_settings["timeout"]) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
csrf_response.raise_for_status() app_logger.warning("[_init_session][State] SSL verification disabled.")
session.verify = self.request_settings["verify_ssl"]
self._tokens = {"access_token": access_token, "csrf_token": csrf_response.json()["result"]} return session
self._authenticated = True # [/DEF:_init_session:Function]
self.logger.info("[authenticate][Exit] Authenticated successfully.")
return self._tokens # [DEF:authenticate:Function]
except requests.exceptions.HTTPError as e: # @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены.
raise AuthenticationError(f"Authentication failed: {e}") from e # @PRE: self.auth and self.base_url must be valid.
except (requests.exceptions.RequestException, KeyError) as e: # @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`.
raise NetworkError(f"Network or parsing error during authentication: {e}") from e # @RETURN: Dict[str, str] - Словарь с токенами.
# [/DEF:authenticate:Function] # @THROW: AuthenticationError, NetworkError - при ошибках.
def authenticate(self) -> Dict[str, str]:
@property with belief_scope("authenticate"):
# [DEF:headers:Function] app_logger.info("[authenticate][Enter] Authenticating to %s", self.base_url)
# @PURPOSE: Возвращает HTTP-заголовки для аутентифицированных запросов. try:
# @PRE: APIClient is initialized and authenticated or can be authenticated. login_url = f"{self.base_url}/security/login"
# @POST: Returns headers including auth tokens. response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"])
def headers(self) -> Dict[str, str]: response.raise_for_status()
with belief_scope("headers"): access_token = response.json()["access_token"]
if not self._authenticated: self.authenticate()
return { csrf_url = f"{self.base_url}/security/csrf_token/"
"Authorization": f"Bearer {self._tokens['access_token']}", csrf_response = self.session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}, timeout=self.request_settings["timeout"])
"X-CSRFToken": self._tokens.get("csrf_token", ""), csrf_response.raise_for_status()
"Referer": self.base_url,
"Content-Type": "application/json" self._tokens = {"access_token": access_token, "csrf_token": csrf_response.json()["result"]}
} self._authenticated = True
# [/DEF:headers:Function] app_logger.info("[authenticate][Exit] Authenticated successfully.")
return self._tokens
# [DEF:request:Function] except requests.exceptions.HTTPError as e:
# @PURPOSE: Выполняет универсальный HTTP-запрос к API. raise AuthenticationError(f"Authentication failed: {e}") from e
# @PARAM: method (str) - HTTP метод. except (requests.exceptions.RequestException, KeyError) as e:
# @PARAM: endpoint (str) - API эндпоинт. raise NetworkError(f"Network or parsing error during authentication: {e}") from e
# @PARAM: headers (Optional[Dict]) - Дополнительные заголовки. # [/DEF:authenticate:Function]
# @PARAM: raw_response (bool) - Возвращать ли сырой ответ.
# @PRE: method and endpoint must be strings. @property
# @POST: Returns response content or raw Response object. # [DEF:headers:Function]
# @RETURN: `requests.Response` если `raw_response=True`, иначе `dict`. # @PURPOSE: Возвращает HTTP-заголовки для аутентифицированных запросов.
# @THROW: SupersetAPIError, NetworkError и их подклассы. # @PRE: APIClient is initialized and authenticated or can be authenticated.
def request(self, method: str, endpoint: str, headers: Optional[Dict] = None, raw_response: bool = False, **kwargs) -> Union[requests.Response, Dict[str, Any]]: # @POST: Returns headers including auth tokens.
with belief_scope("request"): def headers(self) -> Dict[str, str]:
full_url = f"{self.base_url}{endpoint}" with belief_scope("headers"):
_headers = self.headers.copy() if not self._authenticated: self.authenticate()
if headers: _headers.update(headers) return {
"Authorization": f"Bearer {self._tokens['access_token']}",
try: "X-CSRFToken": self._tokens.get("csrf_token", ""),
response = self.session.request(method, full_url, headers=_headers, **kwargs) "Referer": self.base_url,
response.raise_for_status() "Content-Type": "application/json"
return response if raw_response else response.json() }
except requests.exceptions.HTTPError as e: # [/DEF:headers:Function]
self._handle_http_error(e, endpoint)
except requests.exceptions.RequestException as e: # [DEF:request:Function]
self._handle_network_error(e, full_url) # @PURPOSE: Выполняет универсальный HTTP-запрос к API.
# [/DEF:request:Function] # @PARAM: method (str) - HTTP метод.
# @PARAM: endpoint (str) - API эндпоинт.
# [DEF:_handle_http_error:Function] # @PARAM: headers (Optional[Dict]) - Дополнительные заголовки.
# @PURPOSE: (Helper) Преобразует HTTP ошибки в кастомные исключения. # @PARAM: raw_response (bool) - Возвращать ли сырой ответ.
# @PARAM: e (requests.exceptions.HTTPError) - Ошибка. # @PRE: method and endpoint must be strings.
# @PARAM: endpoint (str) - Эндпоинт. # @POST: Returns response content or raw Response object.
# @PRE: e must be a valid HTTPError with a response. # @RETURN: `requests.Response` если `raw_response=True`, иначе `dict`.
# @POST: Raises a specific SupersetAPIError or subclass. # @THROW: SupersetAPIError, NetworkError и их подклассы.
def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str): def request(self, method: str, endpoint: str, headers: Optional[Dict] = None, raw_response: bool = False, **kwargs) -> Union[requests.Response, Dict[str, Any]]:
with belief_scope("_handle_http_error"): with belief_scope("request"):
status_code = e.response.status_code full_url = f"{self.base_url}{endpoint}"
if status_code == 404: raise DashboardNotFoundError(endpoint) from e _headers = self.headers.copy()
if status_code == 403: raise PermissionDeniedError() from e if headers: _headers.update(headers)
if status_code == 401: raise AuthenticationError() from e
raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e try:
# [/DEF:_handle_http_error:Function] response = self.session.request(method, full_url, headers=_headers, **kwargs)
response.raise_for_status()
# [DEF:_handle_network_error:Function] return response if raw_response else response.json()
# @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`. except requests.exceptions.HTTPError as e:
# @PARAM: e (requests.exceptions.RequestException) - Ошибка. self._handle_http_error(e, endpoint)
# @PARAM: url (str) - URL. except requests.exceptions.RequestException as e:
# @PRE: e must be a RequestException. self._handle_network_error(e, full_url)
# @POST: Raises a NetworkError. # [/DEF:request:Function]
def _handle_network_error(self, e: requests.exceptions.RequestException, url: str):
with belief_scope("_handle_network_error"): # [DEF:_handle_http_error:Function]
if isinstance(e, requests.exceptions.Timeout): msg = "Request timeout" # @PURPOSE: (Helper) Преобразует HTTP ошибки в кастомные исключения.
elif isinstance(e, requests.exceptions.ConnectionError): msg = "Connection error" # @PARAM: e (requests.exceptions.HTTPError) - Ошибка.
else: msg = f"Unknown network error: {e}" # @PARAM: endpoint (str) - Эндпоинт.
raise NetworkError(msg, url=url) from e # @PRE: e must be a valid HTTPError with a response.
# [/DEF:_handle_network_error:Function] # @POST: Raises a specific SupersetAPIError or subclass.
def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str):
# [DEF:upload_file:Function] with belief_scope("_handle_http_error"):
# @PURPOSE: Загружает файл на сервер через multipart/form-data. status_code = e.response.status_code
# @PARAM: endpoint (str) - Эндпоинт. if status_code == 404: raise DashboardNotFoundError(endpoint) from e
# @PARAM: file_info (Dict[str, Any]) - Информация о файле. if status_code == 403: raise PermissionDeniedError() from e
# @PARAM: extra_data (Optional[Dict]) - Дополнительные данные. if status_code == 401: raise AuthenticationError() from e
# @PARAM: timeout (Optional[int]) - Таймаут. raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e
# @PRE: file_info must contain 'file_obj' and 'file_name'. # [/DEF:_handle_http_error:Function]
# @POST: File is uploaded and response returned.
# @RETURN: Ответ API в виде словаря. # [DEF:_handle_network_error:Function]
# @THROW: SupersetAPIError, NetworkError, TypeError. # @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`.
def upload_file(self, endpoint: str, file_info: Dict[str, Any], extra_data: Optional[Dict] = None, timeout: Optional[int] = None) -> Dict: # @PARAM: e (requests.exceptions.RequestException) - Ошибка.
with belief_scope("upload_file"): # @PARAM: url (str) - URL.
full_url = f"{self.base_url}{endpoint}" # @PRE: e must be a RequestException.
_headers = self.headers.copy(); _headers.pop('Content-Type', None) # @POST: Raises a NetworkError.
def _handle_network_error(self, e: requests.exceptions.RequestException, url: str):
file_obj, file_name, form_field = file_info.get("file_obj"), file_info.get("file_name"), file_info.get("form_field", "file") with belief_scope("_handle_network_error"):
if isinstance(e, requests.exceptions.Timeout): msg = "Request timeout"
files_payload = {} elif isinstance(e, requests.exceptions.ConnectionError): msg = "Connection error"
if isinstance(file_obj, (str, Path)): else: msg = f"Unknown network error: {e}"
with open(file_obj, 'rb') as f: raise NetworkError(msg, url=url) from e
files_payload = {form_field: (file_name, f.read(), 'application/x-zip-compressed')} # [/DEF:_handle_network_error:Function]
elif isinstance(file_obj, io.BytesIO):
files_payload = {form_field: (file_name, file_obj.getvalue(), 'application/x-zip-compressed')} # [DEF:upload_file:Function]
else: # @PURPOSE: Загружает файл на сервер через multipart/form-data.
raise TypeError(f"Unsupported file_obj type: {type(file_obj)}") # @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: file_info (Dict[str, Any]) - Информация о файле.
return self._perform_upload(full_url, files_payload, extra_data, _headers, timeout) # @PARAM: extra_data (Optional[Dict]) - Дополнительные данные.
# [/DEF:upload_file:Function] # @PARAM: timeout (Optional[int]) - Таймаут.
# @PRE: file_info must contain 'file_obj' and 'file_name'.
# [DEF:_perform_upload:Function] # @POST: File is uploaded and response returned.
# @PURPOSE: (Helper) Выполняет POST запрос с файлом. # @RETURN: Ответ API в виде словаря.
# @PARAM: url (str) - URL. # @THROW: SupersetAPIError, NetworkError, TypeError.
# @PARAM: files (Dict) - Файлы. def upload_file(self, endpoint: str, file_info: Dict[str, Any], extra_data: Optional[Dict] = None, timeout: Optional[int] = None) -> Dict:
# @PARAM: data (Optional[Dict]) - Данные. with belief_scope("upload_file"):
# @PARAM: headers (Dict) - Заголовки. full_url = f"{self.base_url}{endpoint}"
# @PARAM: timeout (Optional[int]) - Таймаут. _headers = self.headers.copy(); _headers.pop('Content-Type', None)
# @PRE: url, files, and headers must be provided.
# @POST: POST request is performed and JSON response returned. file_obj, file_name, form_field = file_info.get("file_obj"), file_info.get("file_name"), file_info.get("form_field", "file")
# @RETURN: Dict - Ответ.
def _perform_upload(self, url: str, files: Dict, data: Optional[Dict], headers: Dict, timeout: Optional[int]) -> Dict: files_payload = {}
with belief_scope("_perform_upload"): if isinstance(file_obj, (str, Path)):
try: with open(file_obj, 'rb') as f:
response = self.session.post(url, files=files, data=data or {}, headers=headers, timeout=timeout or self.request_settings["timeout"]) files_payload = {form_field: (file_name, f.read(), 'application/x-zip-compressed')}
response.raise_for_status() elif isinstance(file_obj, io.BytesIO):
# Добавляем логирование для отладки files_payload = {form_field: (file_name, file_obj.getvalue(), 'application/x-zip-compressed')}
if response.status_code == 200: else:
try: raise TypeError(f"Unsupported file_obj type: {type(file_obj)}")
return response.json()
except Exception as json_e: return self._perform_upload(full_url, files_payload, extra_data, _headers, timeout)
self.logger.debug(f"[_perform_upload][Debug] Response is not valid JSON: {response.text[:200]}...") # [/DEF:upload_file:Function]
raise SupersetAPIError(f"API error during upload: Response is not valid JSON: {json_e}") from json_e
return response.json() # [DEF:_perform_upload:Function]
except requests.exceptions.HTTPError as e: # @PURPOSE: (Helper) Выполняет POST запрос с файлом.
raise SupersetAPIError(f"API error during upload: {e.response.text}") from e # @PARAM: url (str) - URL.
except requests.exceptions.RequestException as e: # @PARAM: files (Dict) - Файлы.
raise NetworkError(f"Network error during upload: {e}", url=url) from e # @PARAM: data (Optional[Dict]) - Данные.
# [/DEF:_perform_upload:Function] # @PARAM: headers (Dict) - Заголовки.
# @PARAM: timeout (Optional[int]) - Таймаут.
# [DEF:fetch_paginated_count:Function] # @PRE: url, files, and headers must be provided.
# @PURPOSE: Получает общее количество элементов для пагинации. # @POST: POST request is performed and JSON response returned.
# @PARAM: endpoint (str) - Эндпоинт. # @RETURN: Dict - Ответ.
# @PARAM: query_params (Dict) - Параметры запроса. def _perform_upload(self, url: str, files: Dict, data: Optional[Dict], headers: Dict, timeout: Optional[int]) -> Dict:
# @PARAM: count_field (str) - Поле с количеством. with belief_scope("_perform_upload"):
# @PRE: query_params must be a dictionary. try:
# @POST: Returns total count of items. response = self.session.post(url, files=files, data=data or {}, headers=headers, timeout=timeout or self.request_settings["timeout"])
# @RETURN: int - Количество. response.raise_for_status()
def fetch_paginated_count(self, endpoint: str, query_params: Dict, count_field: str = "count") -> int: if response.status_code == 200:
with belief_scope("fetch_paginated_count"): try:
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query_params)})) return response.json()
return response_json.get(count_field, 0) except Exception as json_e:
# [/DEF:fetch_paginated_count:Function] app_logger.debug(f"[_perform_upload][Debug] Response is not valid JSON: {response.text[:200]}...")
raise SupersetAPIError(f"API error during upload: Response is not valid JSON: {json_e}") from json_e
# [DEF:fetch_paginated_data:Function] return response.json()
# @PURPOSE: Автоматически собирает данные со всех страниц пагинированного эндпоинта. except requests.exceptions.HTTPError as e:
# @PARAM: endpoint (str) - Эндпоинт. raise SupersetAPIError(f"API error during upload: {e.response.text}") from e
# @PARAM: pagination_options (Dict[str, Any]) - Опции пагинации. except requests.exceptions.RequestException as e:
# @PRE: pagination_options must contain 'base_query', 'total_count', 'results_field'. raise NetworkError(f"Network error during upload: {e}", url=url) from e
# @POST: Returns all items across all pages. # [/DEF:_perform_upload:Function]
# @RETURN: List[Any] - Список данных.
def fetch_paginated_data(self, endpoint: str, pagination_options: Dict[str, Any]) -> List[Any]: # [DEF:fetch_paginated_count:Function]
with belief_scope("fetch_paginated_data"): # @PURPOSE: Получает общее количество элементов для пагинации.
base_query, total_count = pagination_options["base_query"], pagination_options["total_count"] # @PARAM: endpoint (str) - Эндпоинт.
results_field, page_size = pagination_options["results_field"], base_query.get('page_size') # @PARAM: query_params (Dict) - Параметры запроса.
assert page_size and page_size > 0, "'page_size' must be a positive number." # @PARAM: count_field (str) - Поле с количеством.
# @PRE: query_params must be a dictionary.
results = [] # @POST: Returns total count of items.
for page in range((total_count + page_size - 1) // page_size): # @RETURN: int - Количество.
query = {**base_query, 'page': page} def fetch_paginated_count(self, endpoint: str, query_params: Dict, count_field: str = "count") -> int:
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)})) with belief_scope("fetch_paginated_count"):
results.extend(response_json.get(results_field, [])) response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query_params)}))
return results return response_json.get(count_field, 0)
# [/DEF:fetch_paginated_data:Function] # [/DEF:fetch_paginated_count:Function]
# [/DEF:APIClient:Class] # [DEF:fetch_paginated_data:Function]
# @PURPOSE: Автоматически собирает данные со всех страниц пагинированного эндпоинта.
# [/DEF:superset_tool.utils.network:Module] # @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: pagination_options (Dict[str, Any]) - Опции пагинации.
# @PRE: pagination_options must contain 'base_query', 'total_count', 'results_field'.
# @POST: Returns all items across all pages.
# @RETURN: List[Any] - Список данных.
def fetch_paginated_data(self, endpoint: str, pagination_options: Dict[str, Any]) -> List[Any]:
with belief_scope("fetch_paginated_data"):
base_query, total_count = pagination_options["base_query"], pagination_options["total_count"]
results_field, page_size = pagination_options["results_field"], base_query.get('page_size')
assert page_size and page_size > 0, "'page_size' must be a positive number."
results = []
for page in range((total_count + page_size - 1) // page_size):
query = {**base_query, 'page': page}
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)}))
results.extend(response_json.get(results_field, []))
return results
# [/DEF:fetch_paginated_data:Function]
# [/DEF:APIClient:Class]
# [/DEF:backend.core.utils.network:Module]

View File

@@ -12,10 +12,9 @@ from requests.exceptions import RequestException
from ..core.plugin_base import PluginBase from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope from ..core.logger import belief_scope
from superset_tool.client import SupersetClient from ..core.superset_client import SupersetClient
from superset_tool.exceptions import SupersetAPIError from ..core.utils.network import SupersetAPIError
from superset_tool.utils.logger import SupersetLogger from ..core.utils.fileio import (
from superset_tool.utils.fileio import (
save_and_unpack_dashboard, save_and_unpack_dashboard,
archive_exports, archive_exports,
sanitize_filename, sanitize_filename,
@@ -23,7 +22,6 @@ from superset_tool.utils.fileio import (
remove_empty_directories, remove_empty_directories,
RetentionPolicy RetentionPolicy
) )
from superset_tool.utils.init_clients import setup_clients
from ..dependencies import get_config_manager from ..dependencies import get_config_manager
# [DEF:BackupPlugin:Class] # [DEF:BackupPlugin:Class]
@@ -131,25 +129,25 @@ class BackupPlugin(PluginBase):
backup_path_str = params.get("backup_path") or config_manager.get_config().settings.backup_path backup_path_str = params.get("backup_path") or config_manager.get_config().settings.backup_path
backup_path = Path(backup_path_str) backup_path = Path(backup_path_str)
logger = SupersetLogger(log_dir=backup_path / "Logs", console=True) from ..core.logger import logger as app_logger
logger.info(f"[BackupPlugin][Entry] Starting backup for {env}.") app_logger.info(f"[BackupPlugin][Entry] Starting backup for {env}.")
try: try:
config_manager = get_config_manager() config_manager = get_config_manager()
if not config_manager.has_environments(): if not config_manager.has_environments():
raise ValueError("No Superset environments configured. Please add an environment in Settings.") raise ValueError("No Superset environments configured. Please add an environment in Settings.")
clients = setup_clients(logger, custom_envs=config_manager.get_environments()) env_config = config_manager.get_environment(env)
client = clients.get(env) if not env_config:
if not client:
raise ValueError(f"Environment '{env}' not found in configuration.") raise ValueError(f"Environment '{env}' not found in configuration.")
client = SupersetClient(env_config)
dashboard_count, dashboard_meta = client.get_dashboards() dashboard_count, dashboard_meta = client.get_dashboards()
logger.info(f"[BackupPlugin][Progress] Found {dashboard_count} dashboards to export in {env}.") app_logger.info(f"[BackupPlugin][Progress] Found {dashboard_count} dashboards to export in {env}.")
if dashboard_count == 0: if dashboard_count == 0:
logger.info("[BackupPlugin][Exit] No dashboards to back up.") app_logger.info("[BackupPlugin][Exit] No dashboards to back up.")
return return
for db in dashboard_meta: for db in dashboard_meta:
@@ -169,23 +167,22 @@ class BackupPlugin(PluginBase):
zip_content=zip_content, zip_content=zip_content,
original_filename=filename, original_filename=filename,
output_dir=dashboard_dir, output_dir=dashboard_dir,
unpack=False, unpack=False
logger=logger
) )
archive_exports(str(dashboard_dir), policy=RetentionPolicy(), logger=logger) archive_exports(str(dashboard_dir), policy=RetentionPolicy())
except (SupersetAPIError, RequestException, IOError, OSError) as db_error: except (SupersetAPIError, RequestException, IOError, OSError) as db_error:
logger.error(f"[BackupPlugin][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True) app_logger.error(f"[BackupPlugin][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True)
continue continue
consolidate_archive_folders(backup_path / env.upper(), logger=logger) consolidate_archive_folders(backup_path / env.upper())
remove_empty_directories(str(backup_path / env.upper()), logger=logger) remove_empty_directories(str(backup_path / env.upper()))
logger.info(f"[BackupPlugin][CoherenceCheck:Passed] Backup logic completed for {env}.") app_logger.info(f"[BackupPlugin][CoherenceCheck:Passed] Backup logic completed for {env}.")
except (RequestException, IOError, KeyError) as e: except (RequestException, IOError, KeyError) as e:
logger.critical(f"[BackupPlugin][Failure] Fatal error during backup for {env}: {e}", exc_info=True) app_logger.critical(f"[BackupPlugin][Failure] Fatal error during backup for {env}: {e}", exc_info=True)
raise e raise e
# [/DEF:execute:Function] # [/DEF:execute:Function]
# [/DEF:BackupPlugin:Class] # [/DEF:BackupPlugin:Class]

View File

@@ -145,19 +145,7 @@ class DebugPlugin(PluginBase):
if not env_config: if not env_config:
raise ValueError(f"Environment '{name}' not found.") raise ValueError(f"Environment '{name}' not found.")
# Map Environment model to SupersetConfig client = SupersetClient(env_config)
from superset_tool.models import SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client.authenticate() client.authenticate()
count, dbs = client.get_databases() count, dbs = client.get_databases()
results[name] = { results[name] = {
@@ -188,19 +176,7 @@ class DebugPlugin(PluginBase):
if not env_config: if not env_config:
raise ValueError(f"Environment '{env_name}' not found.") raise ValueError(f"Environment '{env_name}' not found.")
# Map Environment model to SupersetConfig client = SupersetClient(env_config)
from superset_tool.models import SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client.authenticate() client.authenticate()
dataset_response = client.get_dataset(dataset_id) dataset_response = client.get_dataset(dataset_id)

View File

@@ -12,8 +12,7 @@ from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope from ..core.logger import logger, belief_scope
from ..core.database import SessionLocal from ..core.database import SessionLocal
from ..models.connection import ConnectionConfig from ..models.connection import ConnectionConfig
from superset_tool.utils.dataset_mapper import DatasetMapper from ..core.utils.dataset_mapper import DatasetMapper
from superset_tool.utils.logger import SupersetLogger
# [/SECTION] # [/SECTION]
# [DEF:MapperPlugin:Class] # [DEF:MapperPlugin:Class]
@@ -137,25 +136,13 @@ class MapperPlugin(PluginBase):
# Get config and initialize client # Get config and initialize client
from ..dependencies import get_config_manager from ..dependencies import get_config_manager
from superset_tool.models import SupersetConfig
config_manager = get_config_manager() config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name) env_config = config_manager.get_environment(env_name)
if not env_config: if not env_config:
logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.") logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.") raise ValueError(f"Environment '{env_name}' not found in configuration.")
# Map Environment model to SupersetConfig client = SupersetClient(env_config)
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client.authenticate() client.authenticate()
postgres_config = None postgres_config = None
@@ -185,9 +172,7 @@ class MapperPlugin(PluginBase):
logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}") logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}")
# Use internal SupersetLogger for DatasetMapper mapper = DatasetMapper()
s_logger = SupersetLogger(name="dataset_mapper_plugin")
mapper = DatasetMapper(s_logger)
try: try:
mapper.run_mapping( mapper.run_mapping(

View File

@@ -13,11 +13,9 @@ import re
from ..core.plugin_base import PluginBase from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope from ..core.logger import belief_scope
from superset_tool.client import SupersetClient from ..core.superset_client import SupersetClient
from superset_tool.utils.init_clients import setup_clients from ..core.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from superset_tool.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from ..dependencies import get_config_manager from ..dependencies import get_config_manager
from superset_tool.utils.logger import SupersetLogger
from ..core.migration_engine import MigrationEngine from ..core.migration_engine import MigrationEngine
from ..core.database import SessionLocal from ..core.database import SessionLocal
from ..models.mapping import DatabaseMapping, Environment from ..models.mapping import DatabaseMapping, Environment
@@ -150,7 +148,7 @@ class MigrationPlugin(PluginBase):
from ..dependencies import get_task_manager from ..dependencies import get_task_manager
tm = get_task_manager() tm = get_task_manager()
class TaskLoggerProxy(SupersetLogger): class TaskLoggerProxy:
# [DEF:__init__:Function] # [DEF:__init__:Function]
# @PURPOSE: Initializes the proxy logger. # @PURPOSE: Initializes the proxy logger.
# @PRE: None. # @PRE: None.
@@ -158,7 +156,7 @@ class MigrationPlugin(PluginBase):
def __init__(self): def __init__(self):
with belief_scope("__init__"): with belief_scope("__init__"):
# Initialize parent with dummy values since we override methods # Initialize parent with dummy values since we override methods
super().__init__(console=False) pass
# [/DEF:__init__:Function] # [/DEF:__init__:Function]
# [DEF:debug:Function] # [DEF:debug:Function]
@@ -246,9 +244,8 @@ class MigrationPlugin(PluginBase):
logger.info(f"[MigrationPlugin][State] Resolved environments: {from_env_name} -> {to_env_name}") logger.info(f"[MigrationPlugin][State] Resolved environments: {from_env_name} -> {to_env_name}")
all_clients = setup_clients(logger, custom_envs=environments) from_c = SupersetClient(src_env)
from_c = all_clients.get(from_env_name) to_c = SupersetClient(tgt_env)
to_c = all_clients.get(to_env_name)
if not from_c or not to_c: if not from_c or not to_c:
raise ValueError(f"Clients not initialized for environments: {from_env_name}, {to_env_name}") raise ValueError(f"Clients not initialized for environments: {from_env_name}, {to_env_name}")

View File

@@ -106,25 +106,13 @@ class SearchPlugin(PluginBase):
# Get config and initialize client # Get config and initialize client
from ..dependencies import get_config_manager from ..dependencies import get_config_manager
from superset_tool.models import SupersetConfig
config_manager = get_config_manager() config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name) env_config = config_manager.get_environment(env_name)
if not env_config: if not env_config:
logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.") logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.") raise ValueError(f"Environment '{env_name}' not found in configuration.")
# Map Environment model to SupersetConfig client = SupersetClient(env_config)
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client.authenticate() client.authenticate()
logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}") logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}")

View File

@@ -13,7 +13,6 @@ from typing import List, Dict
from backend.src.core.logger import belief_scope from backend.src.core.logger import belief_scope
from backend.src.core.superset_client import SupersetClient from backend.src.core.superset_client import SupersetClient
from backend.src.core.utils.matching import suggest_mappings from backend.src.core.utils.matching import suggest_mappings
from superset_tool.models import SupersetConfig
# [/SECTION] # [/SECTION]
# [DEF:MappingService:Class] # [DEF:MappingService:Class]
@@ -43,17 +42,7 @@ class MappingService:
if not env: if not env:
raise ValueError(f"Environment {env_id} not found") raise ValueError(f"Environment {env_id} not found")
superset_config = SupersetConfig( return SupersetClient(env)
env=env.name,
base_url=env.url,
auth={
"provider": "db",
"username": env.username,
"password": env.password,
"refresh": "false"
}
)
return SupersetClient(superset_config)
# [/DEF:_get_client:Function] # [/DEF:_get_client:Function]
# [DEF:get_suggestions:Function] # [DEF:get_suggestions:Function]

Binary file not shown.

View File

@@ -1,99 +0,0 @@
#!/usr/bin/env python3
"""Test script to verify the fixes for SupersetClient initialization."""
import sys
sys.path.insert(0, '.')
from src.core.config_manager import ConfigManager
from src.core.config_models import Environment
from src.plugins.search import SearchPlugin
from src.plugins.mapper import MapperPlugin
from src.plugins.debug import DebugPlugin
def test_config_manager():
"""Test ConfigManager methods."""
print("Testing ConfigManager...")
try:
config_manager = ConfigManager()
print(f" ConfigManager initialized")
# Test get_environment method
if hasattr(config_manager, 'get_environment'):
print(f" get_environment method exists")
# Add a test environment if none exists
if not config_manager.has_environments():
test_env = Environment(
id="test-env",
name="Test Environment",
url="http://localhost:8088",
username="admin",
password="admin"
)
config_manager.add_environment(test_env)
print(f" Added test environment: {test_env.name}")
# Test retrieving environment
envs = config_manager.get_environments()
if envs:
test_env_id = envs[0].id
env_config = config_manager.get_environment(test_env_id)
print(f" Successfully retrieved environment: {env_config.name}")
return True
else:
print(f" No environments available (add one in settings)")
return False
except Exception as e:
print(f" Error: {e}")
return False
def test_plugins():
"""Test plugin initialization."""
print("\nTesting plugins...")
plugins = [
("Search Plugin", SearchPlugin()),
("Mapper Plugin", MapperPlugin()),
("Debug Plugin", DebugPlugin())
]
all_ok = True
for name, plugin in plugins:
print(f"\nTesting {name}...")
try:
plugin_id = plugin.id
plugin_name = plugin.name
plugin_version = plugin.version
schema = plugin.get_schema()
print(f" ✓ ID: {plugin_id}")
print(f" ✓ Name: {plugin_name}")
print(f" ✓ Version: {plugin_version}")
print(f" ✓ Schema: {schema}")
except Exception as e:
print(f" ✗ Error: {e}")
all_ok = False
return all_ok
def main():
"""Main test function."""
print("=" * 50)
print("Superset Tools Fix Verification")
print("=" * 50)
config_ok = test_config_manager()
plugins_ok = test_plugins()
print("\n" + "=" * 50)
if config_ok and plugins_ok:
print("✅ All fixes verified successfully!")
else:
print("❌ Some tests failed")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,5 @@
import pytest import pytest
from backend.src.core.logger import belief_scope, logger from src.core.logger import belief_scope, logger
# [DEF:test_belief_scope_logs_entry_action_exit:Function] # [DEF:test_belief_scope_logs_entry_action_exit:Function]

View File

@@ -1,62 +1,23 @@
import pytest import pytest
from superset_tool.models import SupersetConfig from src.core.config_models import Environment
from superset_tool.utils.logger import belief_scope from src.core.logger import belief_scope
# [DEF:test_superset_config_url_normalization:Function] # [DEF:test_environment_model:Function]
# @PURPOSE: Tests that SupersetConfig correctly normalizes the base URL. # @PURPOSE: Tests that Environment model correctly stores values.
# @PRE: SupersetConfig class is available. # @PRE: Environment class is available.
# @POST: URL normalization is verified. # @POST: Values are verified.
def test_superset_config_url_normalization(): def test_environment_model():
with belief_scope("test_superset_config_url_normalization"): with belief_scope("test_environment_model"):
auth = { env = Environment(
"provider": "db", id="test-id",
"username": "admin", name="test-env",
"password": "password", url="http://localhost:8088/api/v1",
"refresh": "token" username="admin",
} password="password"
# Test with /api/v1 already present
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088/api/v1",
auth=auth
) )
assert config.base_url == "http://localhost:8088/api/v1" assert env.id == "test-id"
assert env.name == "test-env"
# Test without /api/v1 assert env.url == "http://localhost:8088/api/v1"
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088",
auth=auth
)
assert config.base_url == "http://localhost:8088/api/v1"
# Test with trailing slash
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088/",
auth=auth
)
assert config.base_url == "http://localhost:8088/api/v1"
# [/DEF:test_superset_config_url_normalization:Function] # [/DEF:test_superset_config_url_normalization:Function]
# [DEF:test_superset_config_invalid_url:Function]
# @PURPOSE: Tests that SupersetConfig raises ValueError for invalid URLs.
# @PRE: SupersetConfig class is available.
# @POST: ValueError is raised for invalid URLs.
def test_superset_config_invalid_url():
with belief_scope("test_superset_config_invalid_url"):
auth = {
"provider": "db",
"username": "admin",
"password": "password",
"refresh": "token"
}
with pytest.raises(ValueError, match="Must start with http:// or https://"):
SupersetConfig(
env="dev",
base_url="localhost:8088",
auth=auth
)
# [/DEF:test_superset_config_invalid_url:Function] # [/DEF:test_superset_config_invalid_url:Function]

View File

@@ -1,413 +0,0 @@
# [DEF:migration_script:Module]
#
# @SEMANTICS: migration, cli, superset, ui, logging, error-recovery, batch-delete
# @PURPOSE: Предоставляет интерактивный CLI для миграции дашбордов Superset между окружениями с возможностью восстановления после ошибок.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: Migration
# [SECTION: IMPORTS]
import json
import logging
import sys
import zipfile
import re
from pathlib import Path
from typing import List, Optional, Tuple, Dict
from superset_tool.client import SupersetClient
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from superset_tool.utils.whiptail_fallback import menu, checklist, yesno, msgbox, inputbox, gauge
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:Migration:Class]
# @PURPOSE: Инкапсулирует логику интерактивной миграции дашбордов с возможностью «удалить‑и‑перезаписать» при ошибке импорта.
# @RELATION: CREATES_INSTANCE_OF -> SupersetLogger
# @RELATION: USES -> SupersetClient
class Migration:
"""
Интерактивный процесс миграции дашбордов.
"""
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует сервис миграции, настраивает логгер и начальные состояния.
# @PRE: None.
# @POST: `self.logger` готов к использованию; `enable_delete_on_failure` = `False`.
def __init__(self) -> None:
default_log_dir = Path.cwd() / "logs"
self.logger = SupersetLogger(
name="migration_script",
log_dir=default_log_dir,
level=logging.INFO,
console=True,
)
self.enable_delete_on_failure = False
self.from_c: Optional[SupersetClient] = None
self.to_c: Optional[SupersetClient] = None
self.dashboards_to_migrate: List[dict] = []
self.db_config_replacement: Optional[dict] = None
self._failed_imports: List[dict] = []
# [/DEF:__init__:Function]
# [DEF:run:Function]
# @PURPOSE: Точка входа последовательный запуск всех шагов миграции.
# @PRE: Логгер готов.
# @POST: Скрипт завершён, пользователю выведено сообщение.
# @RELATION: CALLS -> self.ask_delete_on_failure
# @RELATION: CALLS -> self.select_environments
# @RELATION: CALLS -> self.select_dashboards
# @RELATION: CALLS -> self.confirm_db_config_replacement
# @RELATION: CALLS -> self.execute_migration
def run(self) -> None:
with self.logger.belief_scope("Migration.run"):
self.logger.info("[run][Entry] Запуск скрипта миграции.")
self.ask_delete_on_failure()
self.select_environments()
self.select_dashboards()
self.confirm_db_config_replacement()
self.execute_migration()
self.logger.info("[run][Exit] Скрипт миграции завершён.")
# [/DEF:run:Function]
# [DEF:ask_delete_on_failure:Function]
# @PURPOSE: Запрашивает у пользователя, следует ли удалять дашборд при ошибке импорта.
# @PRE: None.
# @POST: `self.enable_delete_on_failure` установлен.
# @RELATION: CALLS -> yesno
def ask_delete_on_failure(self) -> None:
with self.logger.belief_scope("Migration.ask_delete_on_failure"):
self.enable_delete_on_failure = yesno(
"Поведение при ошибке импорта",
"Если импорт завершится ошибкой, удалить существующий дашборд и попытаться импортировать заново?",
)
self.logger.info(
"[ask_delete_on_failure][State] Delete-on-failure = %s",
self.enable_delete_on_failure,
)
# [/DEF:ask_delete_on_failure:Function]
# [DEF:select_environments:Function]
# @PURPOSE: Позволяет пользователю выбрать исходное и целевое окружения Superset.
# @PRE: `setup_clients` успешно инициализирует все клиенты.
# @POST: `self.from_c` и `self.to_c` установлены.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> menu
def select_environments(self) -> None:
with self.logger.belief_scope("Migration.select_environments"):
self.logger.info("[select_environments][Entry] Шаг 1/5: Выбор окружений.")
try:
all_clients = setup_clients(self.logger)
available_envs = list(all_clients.keys())
except Exception as e:
self.logger.error("[select_environments][Failure] %s", e, exc_info=True)
msgbox("Ошибка", "Не удалось инициализировать клиенты.")
return
rc, from_env_name = menu(
title="Выбор окружения",
prompt="Исходное окружение:",
choices=available_envs,
)
if rc != 0 or from_env_name is None:
self.logger.info("[select_environments][State] Source environment selection cancelled.")
return
self.from_c = all_clients[from_env_name]
self.logger.info("[select_environments][State] from = %s", from_env_name)
available_envs.remove(from_env_name)
rc, to_env_name = menu(
title="Выбор окружения",
prompt="Целевое окружение:",
choices=available_envs,
)
if rc != 0 or to_env_name is None:
self.logger.info("[select_environments][State] Target environment selection cancelled.")
return
self.to_c = all_clients[to_env_name]
self.logger.info("[select_environments][State] to = %s", to_env_name)
self.logger.info("[select_environments][Exit] Шаг 1 завершён.")
# [/DEF:select_environments:Function]
# [DEF:select_dashboards:Function]
# @PURPOSE: Позволяет пользователю выбрать набор дашбордов для миграции.
# @PRE: `self.from_c` инициализирован.
# @POST: `self.dashboards_to_migrate` заполнен.
# @RELATION: CALLS -> self.from_c.get_dashboards
# @RELATION: CALLS -> checklist
def select_dashboards(self) -> None:
with self.logger.belief_scope("Migration.select_dashboards"):
self.logger.info("[select_dashboards][Entry] Шаг 2/5: Выбор дашбордов.")
if self.from_c is None:
self.logger.error("[select_dashboards][Failure] Source client not initialized.")
msgbox("Ошибка", "Исходное окружение не выбрано.")
return
try:
_, all_dashboards = self.from_c.get_dashboards()
if not all_dashboards:
self.logger.warning("[select_dashboards][State] No dashboards.")
msgbox("Информация", "В исходном окружении нет дашбордов.")
return
rc, regex = inputbox("Поиск", "Введите регулярное выражение для поиска дашбордов:")
if rc != 0:
return
# Ensure regex is a string and perform caseinsensitive search
regex_str = str(regex)
filtered_dashboards = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
options = [("ALL", "Все дашборды")] + [
(str(d["id"]), d["dashboard_title"]) for d in filtered_dashboards
]
rc, selected = checklist(
title="Выбор дашбордов",
prompt="Отметьте нужные дашборды (введите номера):",
options=options,
)
if rc != 0:
return
if "ALL" in selected:
self.dashboards_to_migrate = filtered_dashboards
else:
self.dashboards_to_migrate = [
d for d in filtered_dashboards if str(d["id"]) in selected
]
self.logger.info(
"[select_dashboards][State] Выбрано %d дашбордов.",
len(self.dashboards_to_migrate),
)
except Exception as e:
self.logger.error("[select_dashboards][Failure] %s", e, exc_info=True)
msgbox("Ошибка", "Не удалось получить список дашбордов.")
self.logger.info("[select_dashboards][Exit] Шаг 2 завершён.")
# [/DEF:select_dashboards:Function]
# [DEF:confirm_db_config_replacement:Function]
# @PURPOSE: Запрашивает у пользователя, требуется ли заменить имена БД в YAML-файлах.
# @PRE: None.
# @POST: `self.db_config_replacement` либо `None`, либо заполнен.
# @RELATION: CALLS -> yesno
# @RELATION: CALLS -> self._select_databases
def confirm_db_config_replacement(self) -> None:
with self.logger.belief_scope("Migration.confirm_db_config_replacement"):
if yesno("Замена БД", "Заменить конфигурацию БД в YAMLфайлах?"):
old_db, new_db = self._select_databases()
if not old_db or not new_db:
self.logger.info("[confirm_db_config_replacement][State] Selection cancelled.")
return
print(f"old_db: {old_db}")
old_result = old_db.get("result", {})
new_result = new_db.get("result", {})
self.db_config_replacement = {
"old": {
"database_name": old_result.get("database_name"),
"uuid": old_result.get("uuid"),
"database_uuid": old_result.get("uuid"),
"id": str(old_db.get("id"))
},
"new": {
"database_name": new_result.get("database_name"),
"uuid": new_result.get("uuid"),
"database_uuid": new_result.get("uuid"),
"id": str(new_db.get("id"))
}
}
self.logger.info("[confirm_db_config_replacement][State] Replacement set: %s", self.db_config_replacement)
else:
self.logger.info("[confirm_db_config_replacement][State] Skipped.")
# [/DEF:confirm_db_config_replacement:Function]
# [DEF:_select_databases:Function]
# @PURPOSE: Позволяет пользователю выбрать исходную и целевую БД через API.
# @PRE: Clients are initialized.
# @POST: Возвращает кортеж (старая БД, новая БД) или (None, None) при отмене.
# @RELATION: CALLS -> self.from_c.get_databases
# @RELATION: CALLS -> self.to_c.get_databases
# @RELATION: CALLS -> self.from_c.get_database
# @RELATION: CALLS -> self.to_c.get_database
# @RELATION: CALLS -> menu
def _select_databases(self) -> Tuple[Optional[Dict], Optional[Dict]]:
with self.logger.belief_scope("Migration._select_databases"):
self.logger.info("[_select_databases][Entry] Selecting databases from both environments.")
if self.from_c is None or self.to_c is None:
self.logger.error("[_select_databases][Failure] Source or target client not initialized.")
msgbox("Ошибка", "Исходное или целевое окружение не выбрано.")
return None, None
# Получаем список БД из обоих окружений
try:
_, from_dbs = self.from_c.get_databases()
_, to_dbs = self.to_c.get_databases()
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch databases: %s", e)
msgbox("Ошибка", "Не удалось получить список баз данных.")
return None, None
# Формируем список для выбора
# По Swagger документации, в ответе API поле называется "database_name"
from_choices = []
for db in from_dbs:
db_name = db.get("database_name", "Без имени")
from_choices.append((str(db["id"]), f"{db_name} (ID: {db['id']})"))
to_choices = []
for db in to_dbs:
db_name = db.get("database_name", "Без имени")
to_choices.append((str(db["id"]), f"{db_name} (ID: {db['id']})"))
# Показываем список БД для исходного окружения
rc, from_sel = menu(
title="Выбор исходной БД",
prompt="Выберите исходную БД:",
choices=[f"{name}" for id, name in from_choices]
)
if rc != 0:
return None, None
# Определяем выбранную БД
from_db_id = from_choices[[choice[1] for choice in from_choices].index(from_sel)][0]
# Получаем полную информацию о выбранной БД из исходного окружения
try:
from_db = self.from_c.get_database(int(from_db_id))
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch database details: %s", e)
msgbox("Ошибка", "Не удалось получить информацию о выбранной базе данных.")
return None, None
# Показываем список БД для целевого окружения
rc, to_sel = menu(
title="Выбор целевой БД",
prompt="Выберите целевую БД:",
choices=[f"{name}" for id, name in to_choices]
)
if rc != 0:
return None, None
# Определяем выбранную БД
to_db_id = to_choices[[choice[1] for choice in to_choices].index(to_sel)][0]
# Получаем полную информацию о выбранной БД из целевого окружения
try:
to_db = self.to_c.get_database(int(to_db_id))
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch database details: %s", e)
msgbox("Ошибка", "Не удалось получить информацию о выбранной базе данных.")
return None, None
self.logger.info("[_select_databases][Exit] Selected databases: %s -> %s", from_db.get("database_name", "Без имени"), to_db.get("database_name", "Без имени"))
return from_db, to_db
# [/DEF:_select_databases:Function]
# [DEF:_batch_delete_by_ids:Function]
# @PURPOSE: Удаляет набор дашбордов по их ID единым запросом.
# @PRE: `ids` непустой список целых чисел.
# @POST: Все указанные дашборды удалены (если они существовали).
# @RELATION: CALLS -> self.to_c.network.request
# @PARAM: ids (List[int]) - Список ID дашбордов для удаления.
def _batch_delete_by_ids(self, ids: List[int]) -> None:
with self.logger.belief_scope("Migration._batch_delete_by_ids", f"ids={ids}"):
if not ids:
self.logger.debug("[_batch_delete_by_ids][Skip] Empty ID list nothing to delete.")
return
if self.to_c is None:
self.logger.error("[_batch_delete_by_ids][Failure] Target client not initialized.")
msgbox("Ошибка", "Целевое окружение не выбрано.")
return
self.logger.info("[_batch_delete_by_ids][Entry] Deleting dashboards IDs: %s", ids)
q_param = json.dumps(ids)
response = self.to_c.network.request(method="DELETE", endpoint="/dashboard/", params={"q": q_param})
if isinstance(response, dict) and response.get("result", True) is False:
self.logger.warning("[_batch_delete_by_ids][Warning] Unexpected delete response: %s", response)
else:
self.logger.info("[_batch_delete_by_ids][Success] Delete request completed.")
# [/DEF:_batch_delete_by_ids:Function]
# [DEF:execute_migration:Function]
# @PURPOSE: Выполняет экспорт-импорт дашбордов, обрабатывает ошибки и, при необходимости, выполняет процедуру восстановления.
# @PRE: `self.dashboards_to_migrate` не пуст; `self.from_c` и `self.to_c` инициализированы.
# @POST: Успешные дашборды импортированы; неудачные - восстановлены или залогированы.
# @RELATION: CALLS -> self.from_c.export_dashboard
# @RELATION: CALLS -> create_temp_file
# @RELATION: CALLS -> update_yamls
# @RELATION: CALLS -> create_dashboard_export
# @RELATION: CALLS -> self.to_c.import_dashboard
# @RELATION: CALLS -> self._batch_delete_by_ids
def execute_migration(self) -> None:
with self.logger.belief_scope("Migration.execute_migration"):
if not self.dashboards_to_migrate:
self.logger.warning("[execute_migration][Skip] No dashboards to migrate.")
msgbox("Информация", "Нет дашбордов для миграции.")
return
if self.from_c is None or self.to_c is None:
self.logger.error("[execute_migration][Failure] Source or target client not initialized.")
msgbox("Ошибка", "Исходное или целевое окружение не выбрано.")
return
total = len(self.dashboards_to_migrate)
self.logger.info("[execute_migration][Entry] Starting migration of %d dashboards.", total)
self.to_c.delete_before_reimport = self.enable_delete_on_failure
with gauge("Миграция...", width=60, height=10) as g:
for i, dash in enumerate(self.dashboards_to_migrate):
dash_id, dash_slug, title = dash["id"], dash.get("slug"), dash["dashboard_title"]
g.set_text(f"Миграция: {title} ({i + 1}/{total})")
g.set_percent(int((i / total) * 100))
exported_content = None # Initialize exported_content
try:
exported_content, _ = self.from_c.export_dashboard(dash_id)
with create_temp_file(content=exported_content, dry_run=True, suffix=".zip", logger=self.logger) as tmp_zip_path, \
create_temp_file(suffix=".dir", logger=self.logger) as tmp_unpack_dir:
if not self.db_config_replacement:
self.to_c.import_dashboard(file_name=tmp_zip_path, dash_id=dash_id, dash_slug=dash_slug)
else:
with zipfile.ZipFile(tmp_zip_path, "r") as zip_ref:
zip_ref.extractall(tmp_unpack_dir)
if self.db_config_replacement:
update_yamls(db_configs=[self.db_config_replacement], path=str(tmp_unpack_dir))
with create_temp_file(suffix=".zip", dry_run=True, logger=self.logger) as tmp_new_zip:
create_dashboard_export(zip_path=tmp_new_zip, source_paths=[str(p) for p in Path(tmp_unpack_dir).glob("**/*")])
self.to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
self.logger.info("[execute_migration][Success] Dashboard %s imported.", title)
except Exception as exc:
self.logger.error("[execute_migration][Failure] %s", exc, exc_info=True)
self._failed_imports.append({"slug": dash_slug, "dash_id": dash_id, "zip_content": exported_content})
msgbox("Ошибка", f"Не удалось мигрировать дашборд {title}.\n\n{exc}")
g.set_percent(100)
if self.enable_delete_on_failure and self._failed_imports:
self.logger.info("[execute_migration][Recovery] %d dashboards failed. Starting recovery.", len(self._failed_imports))
_, target_dashboards = self.to_c.get_dashboards()
slug_to_id = {d["slug"]: d["id"] for d in target_dashboards if "slug" in d and "id" in d}
ids_to_delete = [slug_to_id[f["slug"]] for f in self._failed_imports if f["slug"] in slug_to_id]
self._batch_delete_by_ids(ids_to_delete)
for fail in self._failed_imports:
with create_temp_file(content=fail["zip_content"], suffix=".zip", logger=self.logger) as retry_zip:
self.to_c.import_dashboard(file_name=retry_zip, dash_id=fail["dash_id"], dash_slug=fail["slug"])
self.logger.info("[execute_migration][Recovered] Dashboard slug '%s' re-imported.", fail["slug"])
self.logger.info("[execute_migration][Exit] Migration finished.")
msgbox("Ошибка" if self._failed_imports else "Информация", "Миграция завершена!")
# [/DEF:execute_migration:Function]
# [/DEF:Migration:Class]
if __name__ == "__main__":
Migration().run()
# [/DEF:migration_script:Module]

View File

@@ -0,0 +1,34 @@
# Specification Quality Checklist: Backend Refactoring - Remove superset_tool
**Purpose**: Validate specification completeness and quality before proceeding to planning
**Created**: 2026-01-22
**Feature**: [Link to spec.md](../spec.md)
## Content Quality
- [x] No implementation details (languages, frameworks, APIs) - *Refactoring is technical by nature, but focused on structural goals.*
- [x] Focused on user value and business needs
- [x] Written for non-technical stakeholders
- [x] All mandatory sections completed
## Requirement Completeness
- [x] No [NEEDS CLARIFICATION] markers remain
- [x] Requirements are testable and unambiguous
- [x] Success criteria are measurable
- [x] Success criteria are technology-agnostic (no implementation details)
- [x] All acceptance scenarios are defined
- [x] Edge cases are identified
- [x] Scope is clearly bounded
- [x] Dependencies and assumptions identified
## Feature Readiness
- [x] All functional requirements have clear acceptance criteria
- [x] User scenarios cover primary flows
- [x] Feature meets measurable outcomes defined in Success Criteria
- [x] No implementation details leak into specification
## Notes
- Items marked incomplete require spec updates before `/speckit.clarify` or `/speckit.plan`

View File

@@ -0,0 +1,39 @@
# API Contracts: 012-remove-superset-tool
## SupersetClient Internal API
The `SupersetClient` will provide the following methods for internal backend use:
### `get_dashboards(query: Optional[Dict] = None) -> Tuple[int, List[Dict]]`
- **Purpose**: Fetches paginated dashboards.
- **Contract**:
- `query`: Optional filters and pagination parameters.
- Returns: `(total_count, dashboard_list)`.
### `export_dashboard(dashboard_id: int) -> Tuple[bytes, str]`
- **Purpose**: Exports a dashboard as a ZIP file.
- **Contract**:
- `dashboard_id`: ID of the dashboard.
- Returns: `(zip_content, filename)`.
### `import_dashboard(file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict`
- **Purpose**: Imports a dashboard from a ZIP file.
- **Contract**:
- `file_name`: Path to ZIP.
- `dash_id`/`dash_slug`: Optional identifiers for delete-retry logic.
- Returns: API response dictionary.
## Configuration Models (Pydantic)
### `Environment`
```python
class Environment(BaseModel):
id: str
name: str
url: str
username: str
password: str
verify_ssl: bool = True
timeout: int = 30
is_default: bool = False
backup_schedule: Schedule = Field(default_factory=Schedule)

View File

@@ -0,0 +1,38 @@
# Data Model: 012-remove-superset-tool
## Entities
### 1. Environment (Updated)
- **Source**: `backend/src/core/config_models.py`
- **Purpose**: Represents a Superset environment configuration, now incorporating all fields required for the `SupersetClient`.
- **Fields**:
- `id`: `str` (Unique identifier)
- `name`: `str` (Display name)
- `url`: `str` (Base URL for API)
- `username`: `str`
- `password`: `str`
- `verify_ssl`: `bool` (Default: `True`)
- `timeout`: `int` (Default: `30`)
- `is_default`: `bool` (Default: `False`)
- `backup_schedule`: `Schedule`
- **Validation Rules**:
- `url` must start with `http://` or `https://`.
- `timeout` must be positive.
### 2. API Contract (SupersetClient)
- **Source**: `backend/src/core/superset_client.py`
- **Purpose**: Self-contained client for Superset API interactions.
- **Key Methods**:
- `authenticate()`
- `get_dashboards(query)`
- `get_datasets(query)`
- `get_databases(query)`
- `export_dashboard(dashboard_id)`
- `import_dashboard(file_name, dash_id, dash_slug)`
- `delete_dashboard(dashboard_id)`
- `get_databases_summary()`
- `get_dashboards_summary()`
## State Transitions
- **Authentication**: Unauthenticated -> Authenticated (token stored in memory/client session).
- **Import/Export**: File System <-> Superset API.

View File

@@ -0,0 +1,69 @@
# Implementation Plan: 012-remove-superset-tool
**Branch**: `012-remove-superset-tool` | **Date**: 2026-01-22 | **Spec**: [spec.md](specs/012-remove-superset-tool/spec.md)
## Summary
The primary requirement is to refactor the backend by removing the redundant `superset_tool` module and merging its essential logic into the `backend` package. This involves migrating the `SupersetClient` logic, utility modules, and configuration models while deprecating CLI-specific interactive logic.
## Technical Context
**Language/Version**: Python 3.9+
**Primary Dependencies**: FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool)
**Storage**: SQLite (tasks.db, migrations.db), Filesystem
**Testing**: pytest
**Target Platform**: Linux server
**Project Type**: Web application (FastAPI backend + SvelteKit frontend)
**Performance Goals**: N/A (Cleanup task)
**Constraints**: Zero references to `superset_tool` in `backend/` source code.
**Scale/Scope**: Refactoring core backend infrastructure.
## Constitution Check
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
1. **Semantic Protocol Compliance**: All migrated code must use `[DEF]` anchors and `@RELATION` tags as per `semantic_protocol.md`.
2. **Causal Validity**: Contracts (Pydantic models and API surfaces) must be defined before implementation.
3. **Everything is a Plugin**: Ensure that logic previously in `superset_tool` that acts as a tool or extension is integrated within the backend's plugin architecture if applicable.
4. **Fractal Complexity Limit**: Migrated modules must adhere to complexity limits.
## Project Structure
### Documentation (this feature)
```text
specs/012-remove-superset-tool/
├── plan.md # This file
├── research.md # Phase 0 output
├── data-model.md # Phase 1 output
├── quickstart.md # Phase 1 output
├── contracts/ # Phase 1 output
└── tasks.md # Phase 2 output
```
### Source Code (repository root)
```text
backend/
├── src/
│ ├── api/ # API routes
│ ├── core/ # Core logic (Target for migration)
│ │ ├── utils/ # Shared utilities
│ │ ├── config_models.py
│ │ ├── logger.py
│ │ └── superset_client.py
│ ├── models/ # Database models
│ ├── plugins/ # Plugin system
│ └── app.py
└── tests/ # Backend tests
```
**Structure Decision**: Web application structure. The `superset_tool` logic will be absorbed into `backend/src/core/` and its subdirectories.
## Complexity Tracking
> **Fill ONLY if Constitution Check has violations that must be justified**
| Violation | Why Needed | Simpler Alternative Rejected Because |
|-----------|------------|-------------------------------------|
| None | | |

View File

@@ -0,0 +1,20 @@
# Quickstart: 012-remove-superset-tool
## Overview
This feature refactors the backend to remove the `superset_tool` dependency. All Superset-related logic is now centralized in `backend/src/core/superset_client.py`.
## Key Changes
1. **Models**: `Environment` model in `backend/src/core/config_models.py` now includes `verify_ssl` and `timeout`.
2. **Client**: `SupersetClient` in `backend/src/core/superset_client.py` is now self-contained and does not inherit from any external module.
3. **Utilities**: Network and file I/O utilities previously in `superset_tool` are now available in `backend/src/core/utils/`.
## Verification Steps
1. **Run Backend**: Start the FastAPI server and ensure no import errors occur.
```bash
cd backend && .venv/bin/python3 -m uvicorn src.app:app --reload
```
2. **Check Connections**: Verify that existing Superset connections load correctly in the UI/API.
3. **Test Migration**: Trigger a dashboard list refresh or a migration task to confirm the consolidated client works as expected.
## Cleanup
Once verified, the `superset_tool/` directory and root-level CLI scripts (`migration_script.py`, `run_mapper.py`) can be deleted.

View File

@@ -0,0 +1,32 @@
# Research: 012-remove-superset-tool
## Unknowns & Clarifications
### 1. Merging `SupersetConfig` into `Environment`
- **Decision**: Fields from `SupersetConfig` (auth, verify_ssl, timeout) will be merged into the `Environment` model in `backend/src/core/config_models.py`.
- **Rationale**: Centralizes configuration and removes dependency on `superset_tool.models`.
- **Alternatives considered**: Keeping them separate, but that defeats the purpose of the refactoring.
### 2. Utility Migration
- **Decision**: `superset_tool/utils/` will be flattened into `backend/src/core/utils/`.
- **Rationale**: Simplifies the directory structure and aligns with the backend's existing utility pattern.
- **Alternatives considered**: Creating a `backend/src/core/utils/superset/` subdirectory, but flattening is preferred for simplicity.
### 3. CLI Deprecation
- **Decision**: `migration_script.py`, `run_mapper.py`, and `whiptail_fallback.py` will be deprecated and not migrated.
- **Rationale**: The web UI now handles these operations, and maintaining interactive CLI logic in the backend is out of scope.
### 4. Logging Consolidation
- **Decision**: Remove `SupersetLogger` and `belief_scope` from `superset_tool` and use `backend/src/core/logger.py`.
- **Rationale**: Ensures consistent logging across the entire backend application.
## Dependency Analysis
- `backend/requirements.txt` already contains `requests`, `PyYAML`, `pydantic`.
- `superset_tool/requirements.txt` is empty, indicating it likely relied on the same environment or was partially integrated.
- **Action**: Ensure all `superset_tool` imports are replaced with backend-local equivalents.
## Integration Patterns
- `backend/src/core/superset_client.py` currently inherits from `superset_tool.client.SupersetClient`.
- **Action**: Move all methods from `superset_tool/client.py` into `backend/src/core/superset_client.py` and remove the inheritance. Refactor `APIClient` (from `superset_tool/utils/network.py`) to be a utility or private helper within the backend.

View File

@@ -0,0 +1,78 @@
# Feature Specification: Backend Refactoring - Remove superset_tool
**Feature Branch**: `012-remove-superset-tool`
**Created**: 2026-01-22
**Status**: Draft
**Input**: User description: "нужен рефакторинг бека - я хочу исключить модуль superset_tool, пусть останется только backend"
## Clarifications
### Session 2026-01-22
- Q: How should we reconcile the `SupersetConfig` model from `superset_tool` with the `Environment` model in the backend? → A: Merge all `SupersetConfig` fields (e.g., `verify_ssl`, `timeout`) into the `Environment` model in `backend/src/core/config_models.py`.
- Q: Where should the utility modules from `superset_tool/utils/` be moved within the `backend/src/core/` directory? → A: Flatten them directly into `backend/src/core/utils/` alongside existing utilities.
- Q: Should interactive CLI utilities (whiptail_fallback.py, migration_script.py) be migrated? → A: No, deprecate them and do not migrate CLI-specific interactive UI logic to the backend.
- Q: How should we handle `SupersetLogger` and `belief_scope`? → A: Consolidate all logging into `backend/src/core/logger.py` and remove the redundant `SupersetLogger` class.
## User Scenarios & Testing *(mandatory)*
### User Story 1 - Maintain Core Functionality (Priority: P1)
As a developer, I want to remove the redundant `superset_tool` module and merge its essential logic into the `backend` package so that the codebase is easier to maintain and deploy.
**Why this priority**: This is a critical architectural cleanup. The existence of two overlapping modules (`superset_tool` and `backend/src/core/superset_client.py`) creates confusion and potential bugs.
**Independent Test**: The application (FastAPI backend) should start and perform all Superset-related operations (fetching dashboards, datasets, etc.) without the `superset_tool` directory present in the root.
**Acceptance Scenarios**:
1. **Given** the `superset_tool` directory is deleted, **When** I run the backend server, **Then** it should start without `ModuleNotFoundError`.
2. **Given** the backend is running, **When** I trigger a dashboard list refresh in the UI, **Then** the backend should successfully fetch data from Superset using the migrated client logic.
3. **Given** a migration task, **When** I execute it, **Then** it should successfully use the consolidated `SupersetClient` to interact with the API.
---
### User Story 2 - Unified Configuration and Models (Priority: P2)
As a developer, I want to use a single set of Pydantic models and configuration logic within the `backend` package instead of relying on external `superset_tool.models`.
**Why this priority**: Eliminates duplicate definitions and ensures consistency across the backend.
**Independent Test**: Verify that `backend/src/core/config_models.py` (or equivalent) contains all necessary fields previously defined in `superset_tool/models.py`.
**Acceptance Scenarios**:
1. **Given** a connection configuration in the database, **When** the backend loads it, **Then** it should correctly instantiate the consolidated configuration models.
---
### Edge Cases
- **Deprecated Scripts**: CLI-only scripts like `migration_script.py` and `run_mapper.py` in the root will be deprecated and removed as their logic is now in the web UI.
- **Dependency Conflicts**: How does the system handle potential version conflicts between `backend/requirements.txt` and `superset_tool/requirements.txt`?
- **Environment Variables**: Ensure that any environment variables previously used by `superset_tool` are still correctly loaded by the consolidated client.
## Requirements *(mandatory)*
### Functional Requirements
- **FR-001**: The system MUST NOT depend on the `superset_tool` top-level module.
- **FR-002**: All essential logic from `superset_tool/client.py` MUST be migrated to `backend/src/core/superset_client.py`. Utility modules from `superset_tool/utils/` (network, fileio, dataset_mapper) MUST be migrated to `backend/src/core/utils/`.
- **FR-003**: `backend/src/core/superset_client.py` MUST be refactored to be a self-contained client that doesn't inherit from `superset_tool.client.BaseSupersetClient`.
- **FR-004**: Redundant logging logic (`SupersetLogger`) MUST be removed in favor of `backend/src/core/logger.py`.
- **FR-004**: The backend's `requirements.txt` MUST be updated to include any dependencies previously required by `superset_tool` (e.g., `requests`, `pyyaml`).
- **FR-005**: All imports in `backend/src/` that reference `superset_tool` MUST be updated to reference the new locations within the backend.
### Key Entities *(include if feature involves data)*
- **SupersetClient**: The consolidated class responsible for all interactions with the Superset REST API.
- **Environment**: The unified configuration model for Superset connections, incorporating legacy `SupersetConfig` fields.
## Success Criteria *(mandatory)*
### Measurable Outcomes
- **SC-001**: Zero references to `superset_tool` in the `backend/` source code.
- **SC-002**: 100% of existing Superset integration tests pass after the refactoring.
- **SC-003**: The `superset_tool/` directory can be safely deleted from the repository.
- **SC-004**: Backend startup time remains unaffected or improves slightly due to simplified import structure.

View File

@@ -0,0 +1,34 @@
# Tasks: 012-remove-superset-tool
## Phase 1: Setup
- [x] T001 Initialize branch `012-remove-superset-tool`
- [x] T002 Update `backend/requirements.txt` with dependencies from `superset_tool` (requests, pyyaml)
## Phase 2: Foundational (Infrastructure Migration)
- [x] T003 [P] Migrate utility modules from `superset_tool/utils/` to `backend/src/core/utils/`
- [x] T004 [P] Refactor `backend/src/core/logger.py` to incorporate essential logging logic from `superset_tool/utils/logging.py`
- [x] T005 Update `Environment` model in `backend/src/core/config_models.py` with `verify_ssl` and `timeout` fields
## Phase 3: User Story 1 - Maintain Core Functionality [US1]
- [x] T006 [US1] Refactor `backend/src/core/superset_client.py` to be self-contained (remove inheritance from `superset_tool.client.BaseSupersetClient`)
- [x] T007 [US1] Migrate all remaining methods from `superset_tool/client.py` to `backend/src/core/superset_client.py`
- [x] T008 [P] [US1] Update all imports in `backend/src/` to remove references to `superset_tool`
- [x] T009 [US1] Verify backend starts without `superset_tool` directory present
- [x] T010 [US1] Verify dashboard and dataset fetching works via API/UI
## Phase 4: User Story 2 - Unified Configuration and Models [US2]
- [x] T011 [US2] Ensure all Pydantic models in `backend/src/core/config_models.py` cover legacy `superset_tool/models.py` requirements
- [x] T012 [US2] Update database loading logic to correctly populate new `Environment` fields
## Phase 5: Polish & Cleanup
- [x] T013 Remove `superset_tool/` directory
- [x] T014 Remove deprecated root scripts: `migration_script.py`, `run_mapper.py`, `whiptail_fallback.py`
- [x] T015 Run full backend test suite to ensure no regressions
## Dependencies
- Phase 2 must be completed before Phase 3.
- T006 and T007 are prerequisites for T008 and T009.
## Implementation Strategy
- **MVP First**: Focus on T006-T008 to ensure the backend can function independently of the `superset_tool` module.
- **Incremental Delivery**: Migrate utilities and models first to provide the necessary foundation for the client refactoring.

View File

@@ -1,14 +0,0 @@
# [DEF:superset_tool:Module]
# @SEMANTICS: package, root
# @PURPOSE: Root package for superset_tool.
# @LAYER: Domain
# @PUBLIC_API: SupersetClient, SupersetConfig
# [SECTION: IMPORTS]
from .client import SupersetClient
from .models import SupersetConfig
# [/SECTION]
__all__ = ["SupersetClient", "SupersetConfig"]
# [/DEF:superset_tool:Module]

View File

@@ -1,516 +0,0 @@
# [DEF:superset_tool.client:Module]
#
# @SEMANTICS: superset, api, client, rest, http, dashboard, dataset, import, export
# @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> superset_tool.models
# @RELATION: DEPENDS_ON -> superset_tool.exceptions
# @RELATION: DEPENDS_ON -> superset_tool.utils
#
# @INVARIANT: All network operations must use the internal APIClient instance.
# @CONSTRAINT: No direct use of 'requests' library outside of APIClient.
# @PUBLIC_API: SupersetClient
# [SECTION: IMPORTS]
import json
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from superset_tool.models import SupersetConfig
from superset_tool.exceptions import ExportError, InvalidZipFormatError
from superset_tool.utils.fileio import get_filename_from_headers
from superset_tool.utils.logger import SupersetLogger, belief_scope
from superset_tool.utils.network import APIClient
# [/SECTION]
# [DEF:SupersetClient:Class]
# @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
# @RELATION: CREATES_INSTANCE_OF -> APIClient
# @RELATION: USES -> SupersetConfig
class SupersetClient:
# [DEF:authenticate:Function]
# @PURPOSE: Authenticates the client using the configured credentials.
# @PRE: self.network must be initialized with valid auth configuration.
# @POST: Client is authenticated and tokens are stored.
# @RETURN: Dict[str, str] - Authentication tokens.
def authenticate(self):
with belief_scope("SupersetClient.authenticate"):
return self.network.authenticate()
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @PRE: `config` должен быть валидным объектом SupersetConfig.
# @POST: Атрибуты `logger`, `config`, и `network` созданы и готовы к работе.
# @PARAM: config (SupersetConfig) - Конфигурация подключения.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def __init__(self, config: SupersetConfig, logger: Optional[SupersetLogger] = None):
with belief_scope("__init__"):
self.logger = logger or SupersetLogger(name="SupersetClient")
self.logger.info("[SupersetClient.__init__][Enter] Initializing SupersetClient.")
self._validate_config(config)
self.config = config
self.network = APIClient(
config=config.dict(),
verify_ssl=config.verify_ssl,
timeout=config.timeout,
logger=self.logger,
)
self.delete_before_reimport: bool = False
self.logger.info("[SupersetClient.__init__][Exit] SupersetClient initialized.")
# [/DEF:__init__:Function]
# [DEF:_validate_config:Function]
# @PURPOSE: Проверяет, что переданный объект конфигурации имеет корректный тип.
# @PRE: `config` должен быть передан.
# @POST: Если проверка пройдена, выполнение продолжается.
# @THROW: TypeError - Если `config` не является экземпляром `SupersetConfig`.
# @PARAM: config (SupersetConfig) - Объект для проверки.
def _validate_config(self, config: SupersetConfig) -> None:
with belief_scope("_validate_config"):
self.logger.debug("[_validate_config][Enter] Validating SupersetConfig.")
assert isinstance(config, SupersetConfig), "Конфигурация должна быть экземпляром SupersetConfig"
self.logger.debug("[_validate_config][Exit] Config is valid.")
# [/DEF:_validate_config:Function]
@property
# [DEF:headers:Function]
# @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый словарь содержит актуальные заголовки, включая токен авторизации.
def headers(self) -> dict:
with belief_scope("headers"):
return self.network.headers
# [/DEF:headers:Function]
# [DEF:get_dashboards:Function]
# @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все дашборды, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса для API.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список дашбордов).
def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards"):
assert self.network, "[get_dashboards][PRE] Network client must be initialized."
self.logger.info("[get_dashboards][Enter] Fetching dashboards.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:get_dashboards:Function]
# [DEF:export_dashboard:Function]
# @PURPOSE: Экспортирует дашборд в виде ZIP-архива.
# @RELATION: CALLS -> self.network.request
# @PRE: dashboard_id должен быть положительным целым числом.
# @POST: Возвращает бинарное содержимое ZIP-архива и имя файла.
# @THROW: ExportError - Если экспорт завершился неудачей.
# @PARAM: dashboard_id (int) - ID дашборда для экспорта.
# @RETURN: Tuple[bytes, str] - Бинарное содержимое ZIP-архива и имя файла.
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
assert isinstance(dashboard_id, int) and dashboard_id > 0, "[export_dashboard][PRE] dashboard_id must be a positive integer."
self.logger.info("[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id)
response = self.network.request(
method="GET",
endpoint="/dashboard/export/",
params={"q": json.dumps([dashboard_id])},
stream=True,
raw_response=True,
)
response = cast(Response, response)
self._validate_export_response(response, dashboard_id)
filename = self._resolve_export_filename(response, dashboard_id)
self.logger.info("[export_dashboard][Exit] Exported dashboard %s to %s.", dashboard_id, filename)
return response.content, filename
# [/DEF:export_dashboard:Function]
# [DEF:import_dashboard:Function]
# @PURPOSE: Импортирует дашборд из ZIP-файла с возможностью автоматического удаления и повторной попытки при ошибке.
# @RELATION: CALLS -> self._do_import
# @RELATION: CALLS -> self.delete_dashboard
# @RELATION: CALLS -> self.get_dashboards
# @PRE: Файл, указанный в `file_name`, должен существовать и быть валидным ZIP-архивом Superset.
# @POST: Дашборд успешно импортирован, возвращен ответ API.
# @THROW: FileNotFoundError - Если файл не найден.
# @THROW: InvalidZipFormatError - Если файл не является валидным ZIP-архивом Superset.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-архиву.
# @PARAM: dash_id (Optional[int]) - ID дашборда для удаления при сбое.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда для поиска ID, если ID не предоставлен.
# @RETURN: Dict - Ответ API в случае успеха.
def import_dashboard(self, file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict:
with belief_scope("import_dashboard"):
assert file_name, "[import_dashboard][PRE] file_name must be provided."
file_path = str(file_name)
self._validate_import_file(file_path)
try:
return self._do_import(file_path)
except Exception as exc:
self.logger.error("[import_dashboard][Failure] First import attempt failed: %s", exc, exc_info=True)
if not self.delete_before_reimport:
raise
target_id = self._resolve_target_id_for_delete(dash_id, dash_slug)
if target_id is None:
self.logger.error("[import_dashboard][Failure] No ID available for delete-retry.")
raise
self.delete_dashboard(target_id)
self.logger.info("[import_dashboard][State] Deleted dashboard ID %s, retrying import.", target_id)
return self._do_import(file_path)
# [/DEF:import_dashboard:Function]
# [DEF:_resolve_target_id_for_delete:Function]
# @PURPOSE: Определяет ID дашборда для удаления, используя ID или slug.
# @PARAM: dash_id (Optional[int]) - ID дашборда.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда.
# @PRE: По крайней мере один из параметров (dash_id или dash_slug) должен быть предоставлен.
# @POST: Возвращает ID дашборда, если найден, иначе None.
# @THROW: APIError - В случае ошибки сетевого запроса при поиске по slug.
# @RETURN: Optional[int] - Найденный ID или None.
def _resolve_target_id_for_delete(self, dash_id: Optional[int], dash_slug: Optional[str]) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
assert dash_id is not None or dash_slug is not None, "[_resolve_target_id_for_delete][PRE] At least one of ID or slug must be provided."
if dash_id is not None:
return dash_id
if dash_slug is not None:
self.logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
self.logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
self.logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
self.logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
self.logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
self.logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
# [/DEF:_resolve_target_id_for_delete:Function]
# [DEF:_do_import:Function]
# @PURPOSE: Выполняет один запрос на импорт без обработки исключений.
# @PRE: Файл должен существовать.
# @POST: Файл успешно загружен, возвращен ответ API.
# @THROW: FileNotFoundError - Если файл не существует.
# @PARAM: file_name (Union[str, Path]) - Путь к файлу.
# @RETURN: Dict - Ответ API.
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
self.logger.debug(f"[_do_import][State] Uploading file: {file_name}")
file_path = Path(file_name)
if file_path.exists():
self.logger.debug(f"[_do_import][State] File size: {file_path.stat().st_size} bytes")
else:
self.logger.error(f"[_do_import][Failure] File does not exist: {file_name}")
raise FileNotFoundError(f"File does not exist: {file_name}")
return self.network.upload_file(
endpoint="/dashboard/import/",
file_info={"file_obj": file_path, "file_name": file_path.name, "form_field": "formData"},
extra_data={"overwrite": "true"},
timeout=self.config.timeout * 2,
)
# [/DEF:_do_import:Function]
# [DEF:delete_dashboard:Function]
# @PURPOSE: Удаляет дашборд по его ID или slug.
# @RELATION: CALLS -> self.network.request
# @PRE: dashboard_id должен быть предоставлен.
# @POST: Дашборд удален или залогировано предупреждение.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @PARAM: dashboard_id (Union[int, str]) - ID или slug дашборда.
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
assert dashboard_id, "[delete_dashboard][PRE] dashboard_id must be provided."
self.logger.info("[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id)
response = self.network.request(method="DELETE", endpoint=f"/dashboard/{dashboard_id}")
response = cast(Dict, response)
if response.get("result", True) is not False:
self.logger.info("[delete_dashboard][Success] Dashboard %s deleted.", dashboard_id)
else:
self.logger.warning("[delete_dashboard][Warning] Unexpected response while deleting %s: %s", dashboard_id, response)
# [/DEF:delete_dashboard:Function]
# [DEF:_extract_dashboard_id_from_zip:Function]
# @PURPOSE: Извлекает ID дашборда из `metadata.yaml` внутри ZIP-архива.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-файлу.
# @PRE: Файл, указанный в `file_name`, должен быть валидным ZIP-архивом.
# @POST: Возвращает ID дашборда, если найден в metadata.yaml, иначе None.
# @THROW: ImportError - Если не установлен `yaml`.
# @RETURN: Optional[int] - ID дашборда или None.
def _extract_dashboard_id_from_zip(self, file_name: Union[str, Path]) -> Optional[int]:
with belief_scope("_extract_dashboard_id_from_zip"):
assert zipfile.is_zipfile(file_name), "[_extract_dashboard_id_from_zip][PRE] file_name must be a valid zip file."
try:
import yaml
with zipfile.ZipFile(file_name, "r") as zf:
for name in zf.namelist():
if name.endswith("metadata.yaml"):
with zf.open(name) as meta_file:
meta = yaml.safe_load(meta_file)
dash_id = meta.get("dashboard_uuid") or meta.get("dashboard_id")
if dash_id: return int(dash_id)
except Exception as exc:
self.logger.error("[_extract_dashboard_id_from_zip][Failure] %s", exc, exc_info=True)
return None
# [/DEF:_extract_dashboard_id_from_zip:Function]
# [DEF:_extract_dashboard_slug_from_zip:Function]
# @PURPOSE: Извлекает slug дашборда из `metadata.yaml` внутри ZIP-архива.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-файлу.
# @PRE: Файл, указанный в `file_name`, должен быть валидным ZIP-архивом.
# @POST: Возвращает slug дашборда, если найден в metadata.yaml, иначе None.
# @THROW: ImportError - Если не установлен `yaml`.
# @RETURN: Optional[str] - Slug дашборда или None.
def _extract_dashboard_slug_from_zip(self, file_name: Union[str, Path]) -> Optional[str]:
with belief_scope("_extract_dashboard_slug_from_zip"):
assert zipfile.is_zipfile(file_name), "[_extract_dashboard_slug_from_zip][PRE] file_name must be a valid zip file."
try:
import yaml
with zipfile.ZipFile(file_name, "r") as zf:
for name in zf.namelist():
if name.endswith("metadata.yaml"):
with zf.open(name) as meta_file:
meta = yaml.safe_load(meta_file)
if slug := meta.get("slug"):
return str(slug)
except Exception as exc:
self.logger.error("[_extract_dashboard_slug_from_zip][Failure] %s", exc, exc_info=True)
return None
# [/DEF:_extract_dashboard_slug_from_zip:Function]
# [DEF:_validate_export_response:Function]
# @PURPOSE: Проверяет, что HTTP-ответ на экспорт является валидным ZIP-архивом.
# @PRE: response должен быть объектом requests.Response.
# @POST: Проверка пройдена, если ответ является непустым ZIP-архивом.
# @THROW: ExportError - Если ответ не является ZIP-архивом или пуст.
# @PARAM: response (Response) - HTTP ответ.
# @PARAM: dashboard_id (int) - ID дашборда.
def _validate_export_response(self, response: Response, dashboard_id: int) -> None:
with belief_scope("_validate_export_response"):
assert isinstance(response, Response), "[_validate_export_response][PRE] response must be a requests.Response object."
content_type = response.headers.get("Content-Type", "")
if "application/zip" not in content_type:
raise ExportError(f"Получен не ZIP-архив (Content-Type: {content_type})")
if not response.content:
raise ExportError("Получены пустые данные при экспорте")
# [/DEF:_validate_export_response:Function]
# [DEF:_resolve_export_filename:Function]
# @PURPOSE: Определяет имя файла для экспорта из заголовков или генерирует его.
# @PRE: response должен быть объектом requests.Response.
# @POST: Возвращает непустое имя файла.
# @PARAM: response (Response) - HTTP ответ.
# @PARAM: dashboard_id (int) - ID дашборда.
# @RETURN: str - Имя файла.
def _resolve_export_filename(self, response: Response, dashboard_id: int) -> str:
with belief_scope("_resolve_export_filename"):
assert isinstance(response, Response), "[_resolve_export_filename][PRE] response must be a requests.Response object."
filename = get_filename_from_headers(dict(response.headers))
if not filename:
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
filename = f"dashboard_export_{dashboard_id}_{timestamp}.zip"
self.logger.warning("[_resolve_export_filename][Warning] Generated filename: %s", filename)
return filename
# [/DEF:_resolve_export_filename:Function]
# [DEF:_validate_query_params:Function]
# @PURPOSE: Формирует корректный набор параметров запроса с пагинацией.
# @PARAM: query (Optional[Dict]) - Исходные параметры.
# @PRE: query, если предоставлен, должен быть словарем.
# @POST: Возвращает словарь, содержащий базовые параметры пагинации, объединенные с `query`.
# @RETURN: Dict - Валидированные параметры.
def _validate_query_params(self, query: Optional[Dict]) -> Dict:
with belief_scope("_validate_query_params"):
assert query is None or isinstance(query, dict), "[_validate_query_params][PRE] query must be a dictionary or None."
base_query = {"page": 0, "page_size": 1000}
return {**base_query, **(query or {})}
# [/DEF:_validate_query_params:Function]
# [DEF:_fetch_total_object_count:Function]
# @PURPOSE: Получает общее количество объектов по указанному эндпоинту для пагинации.
# @PARAM: endpoint (str) - API эндпоинт.
# @PRE: endpoint должен быть непустой строкой.
# @POST: Возвращает общее количество объектов (>= 0).
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: int - Количество объектов.
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
assert endpoint and isinstance(endpoint, str), "[_fetch_total_object_count][PRE] endpoint must be a non-empty string."
return self.network.fetch_paginated_count(
endpoint=endpoint,
query_params={"page": 0, "page_size": 1},
count_field="count",
)
# [/DEF:_fetch_total_object_count:Function]
# [DEF:_fetch_all_pages:Function]
# @PURPOSE: Итерируется по всем страницам пагинированного API и собирает все данные.
# @PARAM: endpoint (str) - API эндпоинт.
# @PARAM: pagination_options (Dict) - Опции пагинации.
# @PRE: endpoint должен быть непустой строкой, pagination_options - словарем.
# @POST: Возвращает полный список объектов.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: List[Dict] - Список всех объектов.
def _fetch_all_pages(self, endpoint: str, pagination_options: Dict) -> List[Dict]:
with belief_scope("_fetch_all_pages"):
assert endpoint and isinstance(endpoint, str), "[_fetch_all_pages][PRE] endpoint must be a non-empty string."
assert isinstance(pagination_options, dict), "[_fetch_all_pages][PRE] pagination_options must be a dictionary."
return self.network.fetch_paginated_data(endpoint=endpoint, pagination_options=pagination_options)
# [/DEF:_fetch_all_pages:Function]
# [DEF:_validate_import_file:Function]
# @PURPOSE: Проверяет, что файл существует, является ZIP-архивом и содержит `metadata.yaml`.
# @PRE: zip_path должен быть предоставлен.
# @POST: Проверка пройдена, если файл существует, является ZIP и содержит `metadata.yaml`.
# @THROW: FileNotFoundError - Если файл не найден.
# @THROW: InvalidZipFormatError - Если файл не является ZIP или не содержит `metadata.yaml`.
# @PARAM: zip_path (Union[str, Path]) - Путь к файлу.
def _validate_import_file(self, zip_path: Union[str, Path]) -> None:
with belief_scope("_validate_import_file"):
assert zip_path, "[_validate_import_file][PRE] zip_path must be provided."
path = Path(zip_path)
assert path.exists(), f"Файл {zip_path} не существует"
assert zipfile.is_zipfile(path), f"Файл {zip_path} не является ZIP-архивом"
with zipfile.ZipFile(path, "r") as zf:
assert any(n.endswith("metadata.yaml") for n in zf.namelist()), f"Архив {zip_path} не содержит 'metadata.yaml'"
# [/DEF:_validate_import_file:Function]
# [DEF:get_datasets:Function]
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все датасеты, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список датасетов).
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
assert self.network, "[get_datasets][PRE] Network client must be initialized."
self.logger.info("[get_datasets][Enter] Fetching datasets.")
validated_query = self._validate_query_params(query)
total_count = self._fetch_total_object_count(endpoint="/dataset/")
paginated_data = self._fetch_all_pages(
endpoint="/dataset/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:get_datasets:Function]
# [DEF:get_databases:Function]
# @PURPOSE: Получает полный список баз данных, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все базы данных, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список баз данных).
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
assert self.network, "[get_databases][PRE] Network client must be initialized."
self.logger.info("[get_databases][Enter] Fetching databases.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = []
total_count = self._fetch_total_object_count(endpoint="/database/")
paginated_data = self._fetch_all_pages(
endpoint="/database/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:get_databases:Function]
# [DEF:get_dataset:Function]
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: dataset_id (int) - ID датасета.
# @PRE: dataset_id должен быть положительным целым числом.
# @POST: Возвращает словарь с информацией о датасете.
# @THROW: APIError - В случае ошибки сетевого запроса или если датасет не найден.
# @RETURN: Dict - Информация о датасете.
def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("get_dataset"):
assert isinstance(dataset_id, int) and dataset_id > 0, "[get_dataset][PRE] dataset_id must be a positive integer."
self.logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
response = self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}")
response = cast(Dict, response)
self.logger.info("[get_dataset][Exit] Got dataset %s.", dataset_id)
return response
# [/DEF:get_dataset:Function]
# [DEF:get_database:Function]
# @PURPOSE: Получает информацию о конкретной базе данных по её ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: database_id (int) - ID базы данных.
# @PRE: database_id должен быть положительным целым числом.
# @POST: Возвращает словарь с информацией о базе данных.
# @THROW: APIError - В случае ошибки сетевого запроса или если база данных не найдена.
# @RETURN: Dict - Информация о базе данных.
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
assert isinstance(database_id, int) and database_id > 0, "[get_database][PRE] database_id must be a positive integer."
self.logger.info("[get_database][Enter] Fetching database %s.", database_id)
response = self.network.request(method="GET", endpoint=f"/database/{database_id}")
response = cast(Dict, response)
self.logger.info("[get_database][Exit] Got database %s.", database_id)
return response
# [/DEF:get_database:Function]
# [DEF:update_dataset:Function]
# @PURPOSE: Обновляет данные датасета по его ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: dataset_id (int) - ID датасета.
# @PARAM: data (Dict) - Данные для обновления.
# @PRE: dataset_id должен быть положительным целым числом, data - непустым словарем.
# @POST: Датасет успешно обновлен, возвращен ответ API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Dict - Ответ API.
def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
with belief_scope("update_dataset"):
assert isinstance(dataset_id, int) and dataset_id > 0, "[update_dataset][PRE] dataset_id must be a positive integer."
assert isinstance(data, dict) and data, "[update_dataset][PRE] data must be a non-empty dictionary."
self.logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
response = self.network.request(
method="PUT",
endpoint=f"/dataset/{dataset_id}",
data=json.dumps(data),
headers={'Content-Type': 'application/json'}
)
response = cast(Dict, response)
self.logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:update_dataset:Function]
# [/DEF:SupersetClient:Class]
# [/DEF:superset_tool.client:Module]

View File

@@ -1,173 +0,0 @@
# [DEF:superset_tool.exceptions:Module]
# @PURPOSE: Определяет иерархию пользовательских исключений для всего инструмента, обеспечивая единую точку обработки ошибок.
# @SEMANTICS: exception, error, hierarchy
# @LAYER: Infra
# [SECTION: IMPORTS]
from pathlib import Path
from typing import Optional, Dict, Any, Union
# [/SECTION]
# [DEF:SupersetToolError:Class]
# @PURPOSE: Базовый класс для всех ошибок, генерируемых инструментом.
# @RELATION: INHERITS_FROM -> Exception
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Optional[Dict[str, Any]]) - Дополнительный контекст ошибки.
class SupersetToolError(Exception):
# [DEF:__init__:Function]
# @PURPOSE: Initializes the base tool error.
# @PRE: message is a string, context is optional dict.
# @POST: Error is initialized with combined message and context.
def __init__(self, message: str, context: Optional[Dict[str, Any]] = None):
self.context = context or {}
super().__init__(f"{message} | Context: {self.context}")
# [/DEF:__init__:Function]
# [/DEF:SupersetToolError:Class]
# [DEF:AuthenticationError:Class]
# @PURPOSE: Ошибки, связанные с аутентификацией или авторизацией.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class AuthenticationError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an authentication error.
# @PRE: Optional message and context.
# @POST: Error is initialized with authentication context.
def __init__(self, message: str = "Authentication failed", **context: Any):
super().__init__(f"[AUTH_FAILURE] {message}", context={"type": "authentication", **context})
# [/DEF:__init__:Function]
# [/DEF:AuthenticationError:Class]
# [DEF:PermissionDeniedError:Class]
# @PURPOSE: Ошибка, возникающая при отказе в доступе к ресурсу.
# @RELATION: INHERITS_FROM -> AuthenticationError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: required_permission (Optional[str]) - Требуемое разрешение.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class PermissionDeniedError(AuthenticationError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a permission denied error.
# @PRE: Optional message, permission string, and context.
# @POST: Error is initialized with permission details.
def __init__(self, message: str = "Permission denied", required_permission: Optional[str] = None, **context: Any):
full_message = f"Permission denied: {required_permission}" if required_permission else message
super().__init__(full_message, context={"required_permission": required_permission, **context})
# [/DEF:__init__:Function]
# [/DEF:PermissionDeniedError:Class]
# [DEF:SupersetAPIError:Class]
# @PURPOSE: Общие ошибки при взаимодействии с Superset API.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class SupersetAPIError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a Superset API error.
# @PRE: Optional message and context.
# @POST: Error is initialized with API failure context.
def __init__(self, message: str = "Superset API error", **context: Any):
super().__init__(f"[API_FAILURE] {message}", context={"type": "api_call", **context})
# [/DEF:__init__:Function]
# [/DEF:SupersetAPIError:Class]
# [DEF:ExportError:Class]
# @PURPOSE: Ошибки, специфичные для операций экспорта.
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class ExportError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an export error.
# @PRE: Optional message and context.
# @POST: Error is initialized with export failure subtype.
def __init__(self, message: str = "Dashboard export failed", **context: Any):
super().__init__(f"[EXPORT_FAILURE] {message}", context={"subtype": "export", **context})
# [/DEF:__init__:Function]
# [/DEF:ExportError:Class]
# [DEF:DashboardNotFoundError:Class]
# @PURPOSE: Ошибка, когда запрошенный дашборд или ресурс не найден (404).
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: dashboard_id_or_slug (Union[int, str]) - ID или slug дашборда.
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class DashboardNotFoundError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a dashboard not found error.
# @PRE: dashboard_id_or_slug is provided.
# @POST: Error is initialized with resource identification.
def __init__(self, dashboard_id_or_slug: Union[int, str], message: str = "Dashboard not found", **context: Any):
super().__init__(f"[NOT_FOUND] Dashboard '{dashboard_id_or_slug}' {message}", context={"subtype": "not_found", "resource_id": dashboard_id_or_slug, **context})
# [/DEF:__init__:Function]
# [/DEF:DashboardNotFoundError:Class]
# [DEF:DatasetNotFoundError:Class]
# @PURPOSE: Ошибка, когда запрашиваемый набор данных не существует (404).
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: dataset_id_or_slug (Union[int, str]) - ID или slug набора данных.
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class DatasetNotFoundError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a dataset not found error.
# @PRE: dataset_id_or_slug is provided.
# @POST: Error is initialized with resource identification.
def __init__(self, dataset_id_or_slug: Union[int, str], message: str = "Dataset not found", **context: Any):
super().__init__(f"[NOT_FOUND] Dataset '{dataset_id_or_slug}' {message}", context={"subtype": "not_found", "resource_id": dataset_id_or_slug, **context})
# [/DEF:__init__:Function]
# [/DEF:DatasetNotFoundError:Class]
# [DEF:InvalidZipFormatError:Class]
# @PURPOSE: Ошибка, указывающая на некорректный формат или содержимое ZIP-архива.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: file_path (Optional[Union[str, Path]]) - Путь к файлу.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class InvalidZipFormatError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an invalid ZIP format error.
# @PRE: Optional message, file path, and context.
# @POST: Error is initialized with file validation context.
def __init__(self, message: str = "Invalid ZIP format or content", file_path: Optional[Union[str, Path]] = None, **context: Any):
super().__init__(f"[FILE_ERROR] {message}", context={"type": "file_validation", "file_path": str(file_path) if file_path else "N/A", **context})
# [/DEF:__init__:Function]
# [/DEF:InvalidZipFormatError:Class]
# [DEF:NetworkError:Class]
# @PURPOSE: Ошибки, связанные с сетевым соединением.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class NetworkError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a network error.
# @PRE: Optional message and context.
# @POST: Error is initialized with network failure context.
def __init__(self, message: str = "Network connection failed", **context: Any):
super().__init__(f"[NETWORK_FAILURE] {message}", context={"type": "network", **context})
# [/DEF:__init__:Function]
# [/DEF:NetworkError:Class]
# [DEF:FileOperationError:Class]
# @PURPOSE: Общие ошибки файловых операций (I/O).
# @RELATION: INHERITS_FROM -> SupersetToolError
class FileOperationError(SupersetToolError):
pass
# [/DEF:FileOperationError:Class]
# [DEF:InvalidFileStructureError:Class]
# @PURPOSE: Ошибка, указывающая на некорректную структуру файлов или директорий.
# @RELATION: INHERITS_FROM -> FileOperationError
class InvalidFileStructureError(FileOperationError):
pass
# [/DEF:InvalidFileStructureError:Class]
# [DEF:ConfigurationError:Class]
# @PURPOSE: Ошибки, связанные с неверной конфигурацией инструмента.
# @RELATION: INHERITS_FROM -> SupersetToolError
class ConfigurationError(SupersetToolError):
pass
# [/DEF:ConfigurationError:Class]
# [/DEF:superset_tool.exceptions:Module]

View File

@@ -1,87 +0,0 @@
# [DEF:superset_tool.models:Module]
#
# @SEMANTICS: pydantic, model, config, validation, data-structure
# @PURPOSE: Определяет Pydantic-модели для конфигурации инструмента, обеспечивая валидацию данных.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> pydantic
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger
# @PUBLIC_API: SupersetConfig, DatabaseConfig
# [SECTION: IMPORTS]
import re
from typing import Optional, Dict, Any
from pydantic import BaseModel, validator, Field
from .utils.logger import SupersetLogger
# [/SECTION]
# [DEF:SupersetConfig:Class]
# @PURPOSE: Модель конфигурации для подключения к одному экземпляру Superset API.
# @RELATION: INHERITS_FROM -> pydantic.BaseModel
class SupersetConfig(BaseModel):
env: str = Field(..., description="Название окружения (например, dev, prod).")
base_url: str = Field(..., description="Базовый URL Superset API, включая /api/v1.")
auth: Dict[str, Any] = Field(..., description="Словарь с данными для аутентификации (provider, username, password, refresh).")
verify_ssl: bool = Field(True, description="Флаг для проверки SSL-сертификатов.")
timeout: int = Field(30, description="Таймаут в секундах для HTTP-запросов.")
logger: Optional[SupersetLogger] = Field(None, description="Экземпляр логгера для логирования.")
# [DEF:validate_auth:Function]
# @PURPOSE: Проверяет, что словарь `auth` содержит все необходимые для аутентификации поля.
# @PRE: `v` должен быть словарем.
# @POST: Возвращает `v`, если все обязательные поля (`provider`, `username`, `password`, `refresh`) присутствуют.
# @THROW: ValueError - Если отсутствуют обязательные поля.
# @PARAM: v (Dict[str, str]) - Значение поля auth.
@validator('auth')
def validate_auth(cls, v: Dict[str, Any]) -> Dict[str, Any]:
required = {'provider', 'username', 'password', 'refresh'}
if not required.issubset(v.keys()):
raise ValueError(f"Словарь 'auth' должен содержать поля: {required}. Отсутствующие: {required - v.keys()}")
return v
# [/DEF:validate_auth:Function]
# [DEF:normalize_base_url:Function]
# @PURPOSE: Нормализует `base_url`, добавляя `/api/v1`, если он отсутствует.
# @PRE: `v` должна быть строкой.
# @POST: Возвращает нормализованный `v`.
# @THROW: ValueError - Если формат URL невалиден.
# @PARAM: v (str) - Значение поля base_url.
@validator('base_url')
def normalize_base_url(cls, v: str) -> str:
v = v.strip()
if not v.startswith(('http://', 'https://')):
raise ValueError(f"Invalid URL scheme: {v}. Must start with http:// or https://")
if '/api/v1' not in v:
v = f"{v.rstrip('/')}/api/v1"
return v
# [/DEF:normalize_base_url:Function]
class Config:
arbitrary_types_allowed = True
# [/DEF:SupersetConfig:Class]
# [DEF:DatabaseConfig:Class]
# @PURPOSE: Модель для параметров трансформации баз данных при миграции дашбордов.
# @RELATION: INHERITS_FROM -> pydantic.BaseModel
class DatabaseConfig(BaseModel):
database_config: Dict[str, Dict[str, Any]] = Field(..., description="Словарь, содержащий 'old' и 'new' конфигурации базы данных.")
logger: Optional[SupersetLogger] = Field(None, description="Экземпляр логгера для логирования.")
# [DEF:validate_config:Function]
# @PURPOSE: Проверяет, что словарь `database_config` содержит ключи 'old' и 'new'.
# @PRE: `v` должен быть словарем.
# @POST: Возвращает `v`, если ключи 'old' и 'new' присутствуют.
# @THROW: ValueError - Если отсутствуют обязательные ключи.
# @PARAM: v (Dict[str, Dict[str, Any]]) - Значение поля database_config.
@validator('database_config')
def validate_config(cls, v: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
if not {'old', 'new'}.issubset(v.keys()):
raise ValueError("'database_config' должен содержать ключи 'old' и 'new'.")
return v
# [/DEF:validate_config:Function]
class Config:
arbitrary_types_allowed = True
# [/DEF:DatabaseConfig:Class]
# [/DEF:superset_tool.models:Module]

View File

@@ -1,5 +0,0 @@
# [DEF:superset_tool.utils:Module]
# @SEMANTICS: package, utils
# @PURPOSE: Utility package for superset_tool.
# @LAYER: Infra
# [/DEF:superset_tool.utils:Module]

View File

@@ -1,110 +0,0 @@
# [DEF:superset_tool.utils.init_clients:Module]
#
# @SEMANTICS: utility, factory, client, initialization, configuration
# @PURPOSE: Централизованно инициализирует клиенты Superset для различных окружений (DEV, PROD, SBX, PREPROD), используя `keyring` для безопасного доступа к паролям.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.models
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> keyring
# @PUBLIC_API: setup_clients
# [SECTION: IMPORTS]
import keyring
import os
from typing import Dict, List, Optional, Any
from superset_tool.models import SupersetConfig
from superset_tool.client import SupersetClient
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:setup_clients:Function]
# @PURPOSE: Инициализирует и возвращает словарь клиентов `SupersetClient`.
# @PRE: `logger` должен быть валидным экземпляром `SupersetLogger`.
# @POST: Возвращает словарь с инициализированными клиентами.
# @THROW: Exception - При любых других ошибках инициализации.
# @RELATION: CREATES_INSTANCE_OF -> SupersetConfig
# @RELATION: CREATES_INSTANCE_OF -> SupersetClient
# @PARAM: logger (SupersetLogger) - Экземпляр логгера для записи процесса.
# @PARAM: custom_envs (List[Dict[str, Any]]) - Список пользовательских настроек окружений.
# @RETURN: Dict[str, SupersetClient] - Словарь, где ключ - имя окружения, значение - `SupersetClient`.
def setup_clients(logger: SupersetLogger, custom_envs: Optional[List[Any]] = None) -> Dict[str, SupersetClient]:
logger.info("[setup_clients][Enter] Starting Superset clients initialization.")
clients = {}
try:
# Try to load from ConfigManager if available
try:
from backend.src.dependencies import get_config_manager
config_manager = get_config_manager()
envs = config_manager.get_environments()
if envs:
logger.info("[setup_clients][Action] Loading environments from ConfigManager")
for env in envs:
logger.debug("[setup_clients][State] Creating config for environment: %s", env.name)
config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={"provider": "db", "username": env.username, "password": env.password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env.name] = SupersetClient(config, logger)
return clients
except (ImportError, Exception) as e:
logger.debug(f"[setup_clients][State] ConfigManager not available or failed: {e}")
if custom_envs:
for env in custom_envs:
# Handle both dict and object (like Pydantic model)
env_name = str(getattr(env, 'name', env.get('name') if isinstance(env, dict) else "unknown"))
base_url = str(getattr(env, 'url', env.get('url') if isinstance(env, dict) else ""))
username = str(getattr(env, 'username', env.get('username') if isinstance(env, dict) else ""))
password = str(getattr(env, 'password', env.get('password') if isinstance(env, dict) else ""))
logger.debug("[setup_clients][State] Creating config for custom environment: %s", env_name)
config = SupersetConfig(
env=env_name,
base_url=base_url,
auth={"provider": "db", "username": username, "password": password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env_name] = SupersetClient(config, logger)
else:
# Fallback to hardcoded environments with keyring
environments = {
"dev": "https://devta.bi.dwh.rusal.com/api/v1",
"prod": "https://prodta.bi.dwh.rusal.com/api/v1",
"sbx": "https://sandboxta.bi.dwh.rusal.com/api/v1",
"preprod": "https://preprodta.bi.dwh.rusal.com/api/v1",
"uatta": "https://uatta.bi.dwh.rusal.com/api/v1",
"dev5":"https://dev.bi.dwh.rusal.com/api/v1"
}
for env_name, base_url in environments.items():
logger.debug("[setup_clients][State] Creating config for environment: %s", env_name.upper())
password = keyring.get_password("system", f"{env_name} migrate")
if not password:
logger.warning(f"Пароль для '{env_name} migrate' не найден в keyring. Пропускаем.")
continue
config = SupersetConfig(
env=env_name,
base_url=base_url,
auth={"provider": "db", "username": "migrate_user", "password": password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env_name] = SupersetClient(config, logger)
logger.info("[setup_clients][Exit] All clients (%s) initialized successfully.", ', '.join(clients.keys()))
return clients
except Exception as e:
logger.critical("[setup_clients][Failure] Critical error during client initialization: %s", e, exc_info=True)
raise
# [/DEF:setup_clients:Function]
# [/DEF:superset_tool.utils.init_clients:Module]

View File

@@ -1,156 +0,0 @@
# [DEF:superset_tool.utils.logger:Module]
#
# @SEMANTICS: logging, utility, infrastructure, wrapper
# @PURPOSE: Предоставляет универсальную обёртку над стандартным `logging.Logger` для унифицированного создания и управления логгерами с выводом в консоль и/или файл.
# @LAYER: Infra
# @RELATION: WRAPS -> logging.Logger
#
# @INVARIANT: Логгер всегда должен иметь имя.
# @PUBLIC_API: SupersetLogger
# [SECTION: IMPORTS]
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional, Any, Mapping, Generator
from contextlib import contextmanager
# [/SECTION]
# [DEF:belief_scope:Function]
# @PURPOSE: Context manager for belief state logging to maintain execution coherence.
# @PRE: scope_id must be a string.
# @POST: Entry and exit actions are logged.
# @PARAM: scope_id (str) - Identifier for the logical scope.
@contextmanager
def belief_scope(scope_id: str) -> Generator[None, None, None]:
"""Context manager for belief state logging."""
logger = logging.getLogger("superset_tool")
logger.debug(f"[BELIEF_ENTRY] {scope_id}")
try:
yield
finally:
logger.debug(f"[BELIEF_EXIT] {scope_id}")
# [/DEF:belief_scope:Function]
# [DEF:SupersetLogger:Class]
# @PURPOSE: Обёртка над `logging.Logger`, которая упрощает конфигурацию и использование логгеров.
# @RELATION: WRAPS -> logging.Logger
class SupersetLogger:
# [DEF:__init__:Function]
# @PURPOSE: Конфигурирует и инициализирует логгер, добавляя обработчики для файла и/или консоли.
# @PRE: Если log_dir указан, путь должен быть валидным (или создаваемым).
# @POST: `self.logger` готов к использованию с настроенными обработчиками.
# @PARAM: name (str) - Идентификатор логгера.
# @PARAM: log_dir (Optional[Path]) - Директория для сохранения лог-файлов.
# @PARAM: level (int) - Уровень логирования (e.g., `logging.INFO`).
# @PARAM: console (bool) - Флаг для включения вывода в консоль.
def __init__(self, name: str = "superset_tool", log_dir: Optional[Path] = None, level: int = logging.INFO, console: bool = True, logger: Optional[logging.Logger] = None) -> None:
with belief_scope("__init__"):
if logger:
self.logger = logger
return
self.logger = logging.getLogger(name)
self.logger.setLevel(level)
self.logger.propagate = False
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
if self.logger.hasHandlers():
self.logger.handlers.clear()
if log_dir:
log_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d")
file_handler = logging.FileHandler(log_dir / f"{name}_{timestamp}.log", encoding="utf-8")
file_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
if console:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
# [/DEF:__init__:Function]
# [DEF:_log:Function]
# @PURPOSE: (Helper) Универсальный метод для вызова соответствующего уровня логирования.
# @PRE: level_method должен быть вызываемым методом логгера. msg must be a string.
# @POST: Сообщение записано в лог.
# @PARAM: level_method (Any) - Метод логгера (info, debug, etc).
# @PARAM: msg (str) - Сообщение.
# @PARAM: args (Any) - Аргументы форматирования.
# @PARAM: extra (Optional[Mapping[str, Any]]) - Дополнительные данные.
# @PARAM: exc_info (bool) - Добавлять ли информацию об исключении.
def _log(self, level_method: Any, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("_log"):
level_method(msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:_log:Function]
# [DEF:info:Function]
# @PURPOSE: Записывает сообщение уровня INFO.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня INFO записано.
def info(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("info"):
self._log(self.logger.info, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:info:Function]
# [DEF:debug:Function]
# @PURPOSE: Записывает сообщение уровня DEBUG.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня DEBUG записано.
def debug(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("debug"):
self._log(self.logger.debug, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:debug:Function]
# [DEF:warning:Function]
# @PURPOSE: Записывает сообщение уровня WARNING.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня WARNING записано.
def warning(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("warning"):
self._log(self.logger.warning, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:warning:Function]
# [DEF:error:Function]
# @PURPOSE: Записывает сообщение уровня ERROR.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня ERROR записано.
def error(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("error"):
self._log(self.logger.error, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:error:Function]
# [DEF:critical:Function]
# @PURPOSE: Записывает сообщение уровня CRITICAL.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня CRITICAL записано.
def critical(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("critical"):
self._log(self.logger.critical, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:critical:Function]
# [DEF:exception:Function]
# @PURPOSE: Записывает сообщение уровня ERROR вместе с трассировкой стека текущего исключения.
# @PRE: msg должен быть строкой.
# @POST: Сообщение об ошибке с traceback записано.
def exception(self, msg: str, *args: Any, **kwargs: Any) -> None:
with belief_scope("exception"):
self.logger.exception(msg, *args, **kwargs)
# [/DEF:exception:Function]
# [DEF:belief_scope:Method]
# @PURPOSE: Instance method wrapper for belief_scope context manager.
# @PRE: scope_id must be a string.
# @POST: Enters the belief scope.
@contextmanager
def belief_scope(self, scope_id: str) -> Generator[None, None, None]:
with belief_scope(scope_id):
yield
# [/DEF:belief_scope:Method]
# [/DEF:SupersetLogger:Class]
# [/DEF:superset_tool.utils.logger:Module]

View File

@@ -1,157 +0,0 @@
# [DEF:superset_tool.utils.whiptail_fallback:Module]
#
# @SEMANTICS: ui, fallback, console, utility, interactive
# @PURPOSE: Предоставляет плотный консольный UI-fallback для интерактивных диалогов, имитируя `whiptail` для систем, где он недоступен.
# @LAYER: UI
# @PUBLIC_API: menu, checklist, yesno, msgbox, inputbox, gauge
# [SECTION: IMPORTS]
import sys
from typing import List, Tuple, Optional, Any
from .logger import belief_scope
# [/SECTION]
# [DEF:menu:Function]
# @PURPOSE: Отображает меню выбора и возвращает выбранный элемент.
# @PARAM: title (str) - Заголовок меню.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PARAM: choices (List[str]) - Список вариантов для выбора.
# @PRE: choices must be a non-empty list of strings.
# @POST: Returns a tuple with return code and selected choice.
# @RETURN: Tuple[int, Optional[str]] - Кортеж (код возврата, выбранный элемент). rc=0 - успех.
def menu(title: str, prompt: str, choices: List[str], **kwargs) -> Tuple[int, Optional[str]]:
with belief_scope("menu"):
print(f"\n=== {title} ===\n{prompt}")
for idx, item in enumerate(choices, 1):
print(f"{idx}) {item}")
try:
raw = input("\nВведите номер (0 отмена): ").strip()
sel = int(raw)
return (0, choices[sel - 1]) if 0 < sel <= len(choices) else (1, None)
except (ValueError, IndexError):
return 1, None
# [/DEF:menu:Function]
# [DEF:checklist:Function]
# @PURPOSE: Отображает список с возможностью множественного выбора.
# @PARAM: title (str) - Заголовок.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PARAM: options (List[Tuple[str, str]]) - Список кортежей (значение, метка).
# @PRE: options must be a list of (value, label) tuples.
# @POST: Returns a list of selected values.
# @RETURN: Tuple[int, List[str]] - Кортеж (код возврата, список выбранных значений).
def checklist(title: str, prompt: str, options: List[Tuple[str, str]], **kwargs) -> Tuple[int, List[str]]:
with belief_scope("checklist"):
print(f"\n=== {title} ===\n{prompt}")
for idx, (val, label) in enumerate(options, 1):
print(f"{idx}) [{val}] {label}")
raw = input("\nВведите номера через запятую (пустой ввод → отказ): ").strip()
if not raw: return 1, []
try:
indices = {int(x.strip()) for x in raw.split(",") if x.strip()}
selected_values = [options[i - 1][0] for i in indices if 0 < i <= len(options)]
return 0, selected_values
except (ValueError, IndexError):
return 1, []
# [/DEF:checklist:Function]
# [DEF:yesno:Function]
# @PURPOSE: Задает вопрос с ответом да/нет.
# @PARAM: title (str) - Заголовок.
# @PARAM: question (str) - Вопрос для пользователя.
# @PRE: question must be a string.
# @POST: Returns boolean based on user input.
# @RETURN: bool - `True`, если пользователь ответил "да".
def yesno(title: str, question: str, **kwargs) -> bool:
with belief_scope("yesno"):
ans = input(f"\n=== {title} ===\n{question} (y/n): ").strip().lower()
return ans in ("y", "yes", "да", "д")
# [/DEF:yesno:Function]
# [DEF:msgbox:Function]
# @PURPOSE: Отображает информационное сообщение.
# @PARAM: title (str) - Заголовок.
# @PARAM: msg (str) - Текст сообщения.
# @PRE: msg must be a string.
# @POST: Message is printed to console.
def msgbox(title: str, msg: str, **kwargs) -> None:
with belief_scope("msgbox"):
print(f"\n=== {title} ===\n{msg}\n")
# [/DEF:msgbox:Function]
# [DEF:inputbox:Function]
# @PURPOSE: Запрашивает у пользователя текстовый ввод.
# @PARAM: title (str) - Заголовок.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PRE: prompt must be a string.
# @POST: Returns user input string.
# @RETURN: Tuple[int, Optional[str]] - Кортеж (код возврата, введенная строка).
def inputbox(title: str, prompt: str, **kwargs) -> Tuple[int, Optional[str]]:
with belief_scope("inputbox"):
print(f"\n=== {title} ===")
val = input(f"{prompt}\n")
return (0, val) if val else (1, None)
# [/DEF:inputbox:Function]
# [DEF:_ConsoleGauge:Class]
# @PURPOSE: Контекстный менеджер для имитации `whiptail gauge` в консоли.
class _ConsoleGauge:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the gauge.
# @PRE: title must be a string.
# @POST: Instance initialized.
def __init__(self, title: str, **kwargs):
with belief_scope("__init__"):
self.title = title
# [/DEF:__init__:Function]
# [DEF:__enter__:Function]
# @PURPOSE: Enters the context.
# @PRE: Instance initialized.
# @POST: Header printed, returns self.
def __enter__(self):
with belief_scope("__enter__"):
print(f"\n=== {self.title} ===")
return self
# [/DEF:__enter__:Function]
# [DEF:__exit__:Function]
# @PURPOSE: Exits the context.
# @PRE: Context entered.
# @POST: Newline printed.
def __exit__(self, exc_type, exc_val, exc_tb):
with belief_scope("__exit__"):
sys.stdout.write("\n"); sys.stdout.flush()
# [/DEF:__exit__:Function]
# [DEF:set_text:Function]
# @PURPOSE: Sets the gauge text.
# @PRE: txt must be a string.
# @POST: Text written to stdout.
def set_text(self, txt: str) -> None:
with belief_scope("set_text"):
sys.stdout.write(f"\r{txt} "); sys.stdout.flush()
# [/DEF:set_text:Function]
# [DEF:set_percent:Function]
# @PURPOSE: Sets the gauge percentage.
# @PRE: percent must be an integer.
# @POST: Percentage written to stdout.
def set_percent(self, percent: int) -> None:
with belief_scope("set_percent"):
sys.stdout.write(f"{percent}%"); sys.stdout.flush()
# [/DEF:set_percent:Function]
# [/DEF:_ConsoleGauge:Class]
# [DEF:gauge:Function]
# @PURPOSE: Создает и возвращает экземпляр `_ConsoleGauge`.
# @PRE: title must be a string.
# @POST: Returns an instance of _ConsoleGauge.
# @PARAM: title (str) - Заголовок для индикатора прогресса.
# @RETURN: _ConsoleGauge - Экземпляр контекстного менеджера.
def gauge(title: str, **kwargs) -> _ConsoleGauge:
with belief_scope("gauge"):
return _ConsoleGauge(title, **kwargs)
# [/DEF:gauge:Function]
# [/DEF:superset_tool.utils.whiptail_fallback:Module]