refactor complete

This commit is contained in:
2026-01-22 17:37:17 +03:00
parent 203ce446f4
commit d99a13d91f
41 changed files with 1789 additions and 3052 deletions

View File

@@ -20,6 +20,8 @@ Auto-generated from all feature plans. Last updated: 2025-12-19
- SQLite (`tasks.db`), JSON (`config.json`) (009-backup-scheduler)
- Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib) (010-refactor-cli-to-web)
- SQLite (for job history/results, connection configs), Filesystem (for temporary file uploads) (010-refactor-cli-to-web)
- Python 3.9+ + FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool) (012-remove-superset-tool)
- SQLite (tasks.db, migrations.db), Filesystem (012-remove-superset-tool)
- Python 3.9+ (Backend), Node.js 18+ (Frontend Build) (001-plugin-arch-svelte-ui)
@@ -40,9 +42,9 @@ cd src; pytest; ruff check .
Python 3.9+ (Backend), Node.js 18+ (Frontend Build): Follow standard conventions
## Recent Changes
- 012-remove-superset-tool: Added Python 3.9+ + FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool)
- 010-refactor-cli-to-web: Added Python 3.9+ (Backend), Node.js 18+ (Frontend) + FastAPI, SvelteKit, Tailwind CSS, Pydantic, SQLAlchemy, `superset_tool` (internal lib)
- 009-backup-scheduler: Added Python 3.9+, Node.js 18+ + FastAPI, APScheduler, SQLAlchemy, SvelteKit, Tailwind CSS
- 009-backup-scheduler: Added Python 3.9+, Node.js 18+ + FastAPI, APScheduler, SQLAlchemy, SvelteKit, Tailwind CSS
<!-- MANUAL ADDITIONS START -->

View File

@@ -42,4 +42,5 @@ urllib3==2.6.2
uvicorn==0.38.0
websockets==15.0.1
pandas
psycopg2-binary
psycopg2-binary
openpyxl

View File

@@ -11,12 +11,11 @@
# [SECTION: IMPORTS]
from fastapi import APIRouter, Depends, HTTPException
from typing import List, Dict, Optional
from backend.src.dependencies import get_config_manager, get_scheduler_service
from backend.src.core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
from ...dependencies import get_config_manager, get_scheduler_service
from ...core.superset_client import SupersetClient
from pydantic import BaseModel, Field
from backend.src.core.config_models import Environment as EnvModel
from backend.src.core.logger import belief_scope
from ...core.config_models import Environment as EnvModel
from ...core.logger import belief_scope
# [/SECTION]
router = APIRouter()
@@ -114,18 +113,7 @@ async def get_environment_databases(id: str, config_manager=Depends(get_config_m
try:
# Initialize SupersetClient from environment config
# Note: We need to map Environment model to SupersetConfig
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env.username,
"password": env.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client = SupersetClient(env)
return client.get_databases_summary()
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to fetch databases: {str(e)}")

View File

@@ -13,9 +13,9 @@
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from typing import List, Optional
from backend.src.dependencies import get_config_manager
from backend.src.core.database import get_db
from backend.src.models.mapping import DatabaseMapping
from ...dependencies import get_config_manager
from ...core.database import get_db
from ...models.mapping import DatabaseMapping
from pydantic import BaseModel
# [/SECTION]

View File

@@ -7,10 +7,9 @@
from fastapi import APIRouter, Depends, HTTPException
from typing import List, Dict
from backend.src.dependencies import get_config_manager, get_task_manager
from backend.src.models.dashboard import DashboardMetadata, DashboardSelection
from backend.src.core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
from ...dependencies import get_config_manager, get_task_manager
from ...models.dashboard import DashboardMetadata, DashboardSelection
from ...core.superset_client import SupersetClient
router = APIRouter(prefix="/api", tags=["migration"])
@@ -27,14 +26,7 @@ async def get_dashboards(env_id: str, config_manager=Depends(get_config_manager)
if not env:
raise HTTPException(status_code=404, detail="Environment not found")
config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={'provider': 'db', 'username': env.username, 'password': env.password, 'refresh': False},
verify_ssl=True,
timeout=30
)
client = SupersetClient(config)
client = SupersetClient(env)
dashboards = client.get_dashboards_summary()
return dashboards
# [/DEF:get_dashboards:Function]

View File

@@ -17,7 +17,6 @@ from ...dependencies import get_config_manager
from ...core.config_manager import ConfigManager
from ...core.logger import logger, belief_scope
from ...core.superset_client import SupersetClient
from superset_tool.models import SupersetConfig
import os
# [/SECTION]
@@ -28,7 +27,7 @@ router = APIRouter()
# @PRE: Config manager is available.
# @POST: Returns masked AppConfig.
# @RETURN: AppConfig - The current configuration.
@router.get("/", response_model=AppConfig)
@router.get("", response_model=AppConfig)
async def get_settings(config_manager: ConfigManager = Depends(get_config_manager)):
with belief_scope("get_settings"):
logger.info("[get_settings][Entry] Fetching all settings")
@@ -85,17 +84,7 @@ async def add_environment(
# Validate connection before adding
try:
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db",
"username": env.username,
"password": env.password,
"refresh": "true"
}
)
client = SupersetClient(config=superset_config)
client = SupersetClient(env)
client.get_dashboards(query={"page_size": 1})
except Exception as e:
logger.error(f"[add_environment][Coherence:Failed] Connection validation failed: {e}")
@@ -130,17 +119,7 @@ async def update_environment(
# Validate connection before updating
try:
superset_config = SupersetConfig(
env=env_to_validate.name,
base_url=env_to_validate.url,
auth={
"provider": "db",
"username": env_to_validate.username,
"password": env_to_validate.password,
"refresh": "true"
}
)
client = SupersetClient(config=superset_config)
client = SupersetClient(env_to_validate)
client.get_dashboards(query={"page_size": 1})
except Exception as e:
logger.error(f"[update_environment][Coherence:Failed] Connection validation failed: {e}")
@@ -187,21 +166,8 @@ async def test_environment_connection(
raise HTTPException(status_code=404, detail=f"Environment {id} not found")
try:
# Create SupersetConfig
# Note: SupersetConfig expects 'auth' dict with specific keys
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db", # Defaulting to db for now
"username": env.username,
"password": env.password,
"refresh": "true"
}
)
# Initialize client (this will trigger authentication)
client = SupersetClient(config=superset_config)
client = SupersetClient(env)
# Try a simple request to verify
client.get_dashboards(query={"page_size": 1})

View File

@@ -6,10 +6,8 @@
import sys
from pathlib import Path
# Add project root to sys.path to allow importing superset_tool
# Assuming app.py is in backend/src/
# project_root is used for static files mounting
project_root = Path(__file__).resolve().parent.parent.parent
sys.path.append(str(project_root))
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
@@ -86,7 +84,7 @@ async def log_requests(request: Request, call_next):
app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(connections.router, prefix="/api/connections", tags=["Connections"])
app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"])
app.include_router(environments.router, prefix="/api/environments", tags=["Environments"])
app.include_router(mappings.router)
app.include_router(migration.router)

View File

@@ -23,6 +23,8 @@ class Environment(BaseModel):
url: str
username: str
password: str # Will be masked in UI
verify_ssl: bool = True
timeout: int = 30
is_default: bool = False
backup_schedule: Schedule = Field(default_factory=Schedule)
# [/DEF:Environment:DataClass]

View File

@@ -29,11 +29,10 @@ class BeliefFormatter(logging.Formatter):
# @PARAM: record (logging.LogRecord) - The log record to format.
# @RETURN: str - The formatted log message.
def format(self, record):
msg = super().format(record)
anchor_id = getattr(_belief_state, 'anchor_id', None)
if anchor_id:
msg = f"[{anchor_id}][Action] {msg}"
return msg
record.msg = f"[{anchor_id}][Action] {record.msg}"
return super().format(record)
# [/DEF:format:Function]
# [/DEF:BeliefFormatter:Class]
@@ -193,6 +192,18 @@ class WebSocketLogHandler(logging.Handler):
# @SEMANTICS: logger, global, instance
# @PURPOSE: The global logger instance for the application, configured with both a console handler and the custom WebSocket handler.
logger = logging.getLogger("superset_tools_app")
# [DEF:believed:Function]
# @PURPOSE: A decorator that wraps a function in a belief scope.
# @PARAM: anchor_id (str) - The identifier for the semantic block.
def believed(anchor_id: str):
def decorator(func):
def wrapper(*args, **kwargs):
with belief_scope(anchor_id):
return func(*args, **kwargs)
return wrapper
return decorator
# [/DEF:believed:Function]
logger.setLevel(logging.INFO)
# Create a formatter

View File

@@ -1,82 +1,102 @@
# [DEF:backend.src.core.superset_client:Module]
#
# @SEMANTICS: superset, api, client, database, metadata
# @PURPOSE: Extends the base SupersetClient with database-specific metadata fetching.
# @SEMANTICS: superset, api, client, rest, http, dashboard, dataset, import, export
# @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @LAYER: Core
# @RELATION: INHERITS_FROM -> superset_tool.client.SupersetClient
# @RELATION: USES -> backend.src.core.utils.network.APIClient
# @RELATION: USES -> backend.src.core.config_models.Environment
#
# @INVARIANT: All database metadata requests must include UUID and name.
# @INVARIANT: All network operations must use the internal APIClient instance.
# @PUBLIC_API: SupersetClient
# [SECTION: IMPORTS]
from typing import List, Dict, Optional, Tuple
from .logger import belief_scope
from superset_tool.client import SupersetClient as BaseSupersetClient
from superset_tool.models import SupersetConfig
import json
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from .logger import logger as app_logger, belief_scope
from .utils.network import APIClient, SupersetAPIError, AuthenticationError, DashboardNotFoundError, NetworkError
from .utils.fileio import get_filename_from_headers
from .config_models import Environment
# [/SECTION]
# [DEF:SupersetClient:Class]
# @PURPOSE: Extended SupersetClient for migration-specific operations.
class SupersetClient(BaseSupersetClient):
# @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
class SupersetClient:
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @PRE: `env` должен быть валидным объектом Environment.
# @POST: Атрибуты `env` и `network` созданы и готовы к работе.
# @PARAM: env (Environment) - Конфигурация окружения.
def __init__(self, env: Environment):
with belief_scope("__init__"):
app_logger.info("[SupersetClient.__init__][Enter] Initializing SupersetClient for env %s.", env.name)
self.env = env
# Construct auth payload expected by Superset API
auth_payload = {
"username": env.username,
"password": env.password,
"provider": "db",
"refresh": "true"
}
self.network = APIClient(
config={
"base_url": env.url,
"auth": auth_payload
},
verify_ssl=env.verify_ssl,
timeout=env.timeout
)
self.delete_before_reimport: bool = False
app_logger.info("[SupersetClient.__init__][Exit] SupersetClient initialized.")
# [/DEF:__init__:Function]
# [DEF:authenticate:Function]
# @PURPOSE: Authenticates the client using the configured credentials.
# @PRE: self.network must be initialized with valid auth configuration.
# @POST: Client is authenticated and tokens are stored.
# @RETURN: Dict[str, str] - Authentication tokens.
def authenticate(self):
def authenticate(self) -> Dict[str, str]:
with belief_scope("SupersetClient.authenticate"):
return self.network.authenticate()
# [DEF:get_databases_summary:Function]
# @PURPOSE: Fetch a summary of databases including uuid, name, and engine.
# @PRE: self.network must be initialized and authenticated.
# @POST: Returns a list of database dictionaries with 'engine' field.
# @RETURN: List[Dict] - Summary of databases.
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
"""
Fetch a summary of databases including uuid, name, and engine.
"""
query = {
"columns": ["uuid", "database_name", "backend"]
}
_, databases = self.get_databases(query=query)
# Map 'backend' to 'engine' for consistency with contracts
for db in databases:
db['engine'] = db.pop('backend', None)
return databases
# [/DEF:get_databases_summary:Function]
# [/DEF:authenticate:Function]
# [DEF:get_database_by_uuid:Function]
# @PURPOSE: Find a database by its UUID.
# @PRE: db_uuid must be a string.
# @POST: Returns database metadata if found.
# @PARAM: db_uuid (str) - The UUID of the database.
# @RETURN: Optional[Dict] - Database info if found, else None.
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]:
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
"""
Find a database by its UUID.
"""
query = {
"filters": [{"col": "uuid", "op": "eq", "value": db_uuid}]
}
_, databases = self.get_databases(query=query)
return databases[0] if databases else None
# [/DEF:get_database_by_uuid:Function]
@property
# [DEF:headers:Function]
# @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
def headers(self) -> dict:
with belief_scope("headers"):
return self.network.headers
# [/DEF:headers:Function]
# [SECTION: DASHBOARD OPERATIONS]
# [DEF:get_dashboards:Function]
# @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса для API.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список дашбордов).
def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards"):
app_logger.info("[get_dashboards][Enter] Fetching dashboards.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:get_dashboards:Function]
# [DEF:get_dashboards_summary:Function]
# @PURPOSE: Fetches dashboard metadata optimized for the grid.
# @PRE: self.network must be authenticated.
# @POST: Returns a list of dashboard dictionaries mapped to the grid schema.
# @RETURN: List[Dict]
def get_dashboards_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_dashboards_summary"):
"""
Fetches dashboard metadata optimized for the grid.
Returns a list of dictionaries mapped to DashboardMetadata fields.
"""
query = {
"columns": ["id", "dashboard_title", "changed_on_utc", "published"]
}
@@ -94,34 +114,287 @@ class SupersetClient(BaseSupersetClient):
return result
# [/DEF:get_dashboards_summary:Function]
# [DEF:export_dashboard:Function]
# @PURPOSE: Экспортирует дашборд в виде ZIP-архива.
# @PARAM: dashboard_id (int) - ID дашборда для экспорта.
# @RETURN: Tuple[bytes, str] - Бинарное содержимое ZIP-архива и имя файла.
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
app_logger.info("[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id)
response = self.network.request(
method="GET",
endpoint="/dashboard/export/",
params={"q": json.dumps([dashboard_id])},
stream=True,
raw_response=True,
)
response = cast(Response, response)
self._validate_export_response(response, dashboard_id)
filename = self._resolve_export_filename(response, dashboard_id)
app_logger.info("[export_dashboard][Exit] Exported dashboard %s to %s.", dashboard_id, filename)
return response.content, filename
# [/DEF:export_dashboard:Function]
# [DEF:import_dashboard:Function]
# @PURPOSE: Импортирует дашборд из ZIP-файла.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-архиву.
# @PARAM: dash_id (Optional[int]) - ID дашборда для удаления при сбое.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда для поиска ID.
# @RETURN: Dict - Ответ API в случае успеха.
def import_dashboard(self, file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict:
with belief_scope("import_dashboard"):
file_path = str(file_name)
self._validate_import_file(file_path)
try:
return self._do_import(file_path)
except Exception as exc:
app_logger.error("[import_dashboard][Failure] First import attempt failed: %s", exc, exc_info=True)
if not self.delete_before_reimport:
raise
target_id = self._resolve_target_id_for_delete(dash_id, dash_slug)
if target_id is None:
app_logger.error("[import_dashboard][Failure] No ID available for delete-retry.")
raise
self.delete_dashboard(target_id)
app_logger.info("[import_dashboard][State] Deleted dashboard ID %s, retrying import.", target_id)
return self._do_import(file_path)
# [/DEF:import_dashboard:Function]
# [DEF:delete_dashboard:Function]
# @PURPOSE: Удаляет дашборд по его ID или slug.
# @PARAM: dashboard_id (Union[int, str]) - ID или slug дашборда.
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
app_logger.info("[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id)
response = self.network.request(method="DELETE", endpoint=f"/dashboard/{dashboard_id}")
response = cast(Dict, response)
if response.get("result", True) is not False:
app_logger.info("[delete_dashboard][Success] Dashboard %s deleted.", dashboard_id)
else:
app_logger.warning("[delete_dashboard][Warning] Unexpected response while deleting %s: %s", dashboard_id, response)
# [/DEF:delete_dashboard:Function]
# [/SECTION]
# [SECTION: DATASET OPERATIONS]
# [DEF:get_datasets:Function]
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список датасетов).
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
app_logger.info("[get_datasets][Enter] Fetching datasets.")
validated_query = self._validate_query_params(query)
total_count = self._fetch_total_object_count(endpoint="/dataset/")
paginated_data = self._fetch_all_pages(
endpoint="/dataset/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:get_datasets:Function]
# [DEF:get_dataset:Function]
# @PURPOSE: Fetch full dataset structure including columns and metrics.
# @PRE: dataset_id must be a valid integer.
# @POST: Returns full dataset metadata from Superset API.
# @PARAM: dataset_id (int) - The ID of the dataset.
# @RETURN: Dict - The dataset metadata.
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @PARAM: dataset_id (int) - ID датасета.
# @RETURN: Dict - Информация о датасете.
def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset", f"id={dataset_id}"):
"""
Fetch full dataset structure.
"""
return self.network.get(f"/api/v1/dataset/{dataset_id}").json()
app_logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
response = self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}")
response = cast(Dict, response)
app_logger.info("[get_dataset][Exit] Got dataset %s.", dataset_id)
return response
# [/DEF:get_dataset:Function]
# [DEF:update_dataset:Function]
# @PURPOSE: Update dataset metadata.
# @PRE: dataset_id must be valid, data must be a valid Superset dataset payload.
# @POST: Dataset is updated in Superset.
# @PARAM: dataset_id (int) - The ID of the dataset.
# @PARAM: data (Dict) - The payload for update.
def update_dataset(self, dataset_id: int, data: Dict):
# @PURPOSE: Обновляет данные датасета по его ID.
# @PARAM: dataset_id (int) - ID датасета.
# @PARAM: data (Dict) - Данные для обновления.
# @RETURN: Dict - Ответ API.
def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
with belief_scope("SupersetClient.update_dataset", f"id={dataset_id}"):
"""
Update dataset metadata.
"""
self.network.put(f"/api/v1/dataset/{dataset_id}", json=data)
app_logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
response = self.network.request(
method="PUT",
endpoint=f"/dataset/{dataset_id}",
data=json.dumps(data),
headers={'Content-Type': 'application/json'}
)
response = cast(Dict, response)
app_logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:update_dataset:Function]
# [/SECTION]
# [SECTION: DATABASE OPERATIONS]
# [DEF:get_databases:Function]
# @PURPOSE: Получает полный список баз данных.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список баз данных).
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
app_logger.info("[get_databases][Enter] Fetching databases.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = []
total_count = self._fetch_total_object_count(endpoint="/database/")
paginated_data = self._fetch_all_pages(
endpoint="/database/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
app_logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:get_databases:Function]
# [DEF:get_database:Function]
# @PURPOSE: Получает информацию о конкретной базе данных по её ID.
# @PARAM: database_id (int) - ID базы данных.
# @RETURN: Dict - Информация о базе данных.
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
app_logger.info("[get_database][Enter] Fetching database %s.", database_id)
response = self.network.request(method="GET", endpoint=f"/database/{database_id}")
response = cast(Dict, response)
app_logger.info("[get_database][Exit] Got database %s.", database_id)
return response
# [/DEF:get_database:Function]
# [DEF:get_databases_summary:Function]
# @PURPOSE: Fetch a summary of databases including uuid, name, and engine.
# @RETURN: List[Dict] - Summary of databases.
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
query = {
"columns": ["uuid", "database_name", "backend"]
}
_, databases = self.get_databases(query=query)
# Map 'backend' to 'engine' for consistency with contracts
for db in databases:
db['engine'] = db.pop('backend', None)
return databases
# [/DEF:get_databases_summary:Function]
# [DEF:get_database_by_uuid:Function]
# @PURPOSE: Find a database by its UUID.
# @PARAM: db_uuid (str) - The UUID of the database.
# @RETURN: Optional[Dict] - Database info if found, else None.
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]:
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
query = {
"filters": [{"col": "uuid", "op": "eq", "value": db_uuid}]
}
_, databases = self.get_databases(query=query)
return databases[0] if databases else None
# [/DEF:get_database_by_uuid:Function]
# [/SECTION]
# [SECTION: HELPERS]
# [DEF:_resolve_target_id_for_delete:Function]
def _resolve_target_id_for_delete(self, dash_id: Optional[int], dash_slug: Optional[str]) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
if dash_id is not None:
return dash_id
if dash_slug is not None:
app_logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
app_logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
app_logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
# [/DEF:_resolve_target_id_for_delete:Function]
# [DEF:_do_import:Function]
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
app_logger.debug(f"[_do_import][State] Uploading file: {file_name}")
file_path = Path(file_name)
if not file_path.exists():
app_logger.error(f"[_do_import][Failure] File does not exist: {file_name}")
raise FileNotFoundError(f"File does not exist: {file_name}")
return self.network.upload_file(
endpoint="/dashboard/import/",
file_info={"file_obj": file_path, "file_name": file_path.name, "form_field": "formData"},
extra_data={"overwrite": "true"},
timeout=self.env.timeout * 2,
)
# [/DEF:_do_import:Function]
# [DEF:_validate_export_response:Function]
def _validate_export_response(self, response: Response, dashboard_id: int) -> None:
with belief_scope("_validate_export_response"):
content_type = response.headers.get("Content-Type", "")
if "application/zip" not in content_type:
raise SupersetAPIError(f"Получен не ZIP-архив (Content-Type: {content_type})")
if not response.content:
raise SupersetAPIError("Получены пустые данные при экспорте")
# [/DEF:_validate_export_response:Function]
# [DEF:_resolve_export_filename:Function]
def _resolve_export_filename(self, response: Response, dashboard_id: int) -> str:
with belief_scope("_resolve_export_filename"):
filename = get_filename_from_headers(dict(response.headers))
if not filename:
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
filename = f"dashboard_export_{dashboard_id}_{timestamp}.zip"
app_logger.warning("[_resolve_export_filename][Warning] Generated filename: %s", filename)
return filename
# [/DEF:_resolve_export_filename:Function]
# [DEF:_validate_query_params:Function]
def _validate_query_params(self, query: Optional[Dict]) -> Dict:
with belief_scope("_validate_query_params"):
base_query = {"page": 0, "page_size": 1000}
return {**base_query, **(query or {})}
# [/DEF:_validate_query_params:Function]
# [DEF:_fetch_total_object_count:Function]
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
return self.network.fetch_paginated_count(
endpoint=endpoint,
query_params={"page": 0, "page_size": 1},
count_field="count",
)
# [/DEF:_fetch_total_object_count:Function]
# [DEF:_fetch_all_pages:Function]
def _fetch_all_pages(self, endpoint: str, pagination_options: Dict) -> List[Dict]:
with belief_scope("_fetch_all_pages"):
return self.network.fetch_paginated_data(endpoint=endpoint, pagination_options=pagination_options)
# [/DEF:_fetch_all_pages:Function]
# [DEF:_validate_import_file:Function]
def _validate_import_file(self, zip_path: Union[str, Path]) -> None:
with belief_scope("_validate_import_file"):
path = Path(zip_path)
if not path.exists():
raise FileNotFoundError(f"Файл {zip_path} не существует")
if not zipfile.is_zipfile(path):
raise SupersetAPIError(f"Файл {zip_path} не является ZIP-архивом")
with zipfile.ZipFile(path, "r") as zf:
if not any(n.endswith("metadata.yaml") for n in zf.namelist()):
raise SupersetAPIError(f"Архив {zip_path} не содержит 'metadata.yaml'")
# [/DEF:_validate_import_file:Function]
# [/SECTION]
# [/DEF:SupersetClient:Class]
# [/DEF:backend.src.core.superset_client:Module]

View File

@@ -1,240 +1,237 @@
# [DEF:superset_tool.utils.dataset_mapper:Module]
#
# @SEMANTICS: dataset, mapping, postgresql, xlsx, superset
# @PURPOSE: Этот модуль отвечает за обновление метаданных (verbose_map) в датасетах Superset, извлекая их из PostgreSQL или XLSX-файлов.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> pandas
# @RELATION: DEPENDS_ON -> psycopg2
# @PUBLIC_API: DatasetMapper
# [SECTION: IMPORTS]
import pandas as pd # type: ignore
import psycopg2 # type: ignore
from superset_tool.client import SupersetClient
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.logger import SupersetLogger
from typing import Dict, List, Optional, Any
# [/SECTION]
# [DEF:DatasetMapper:Class]
# @PURPOSE: Класс для меппинга и обновления verbose_map в датасетах Superset.
class DatasetMapper:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the mapper.
# @PRE: logger должен быть экземпляром SupersetLogger.
# @POST: Объект DatasetMapper инициализирован.
def __init__(self, logger: SupersetLogger):
self.logger = logger
# [/DEF:__init__:Function]
# [DEF:get_postgres_comments:Function]
# @PURPOSE: Извлекает комментарии к колонкам из системного каталога PostgreSQL.
# @PRE: db_config должен содержать валидные параметры подключения (host, port, user, password, dbname).
# @PRE: table_name и table_schema должны быть строками.
# @POST: Возвращается словарь, где ключи - имена колонок, значения - комментарии из БД.
# @THROW: Exception - При ошибках подключения или выполнения запроса к БД.
# @PARAM: db_config (Dict) - Конфигурация для подключения к БД.
# @PARAM: table_name (str) - Имя таблицы.
# @PARAM: table_schema (str) - Схема таблицы.
# @RETURN: Dict[str, str] - Словарь с комментариями к колонкам.
def get_postgres_comments(self, db_config: Dict, table_name: str, table_schema: str) -> Dict[str, str]:
with self.logger.belief_scope("Fetch comments from PostgreSQL"):
self.logger.info("[get_postgres_comments][Enter] Fetching comments from PostgreSQL for %s.%s.", table_schema, table_name)
query = f"""
SELECT
cols.column_name,
CASE
WHEN pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
) LIKE '%|%' THEN
split_part(
pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
),
'|',
1
)
ELSE
pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
)
END AS column_comment
FROM
information_schema.columns cols
WHERE cols.table_catalog = '{db_config.get('dbname')}' AND cols.table_name = '{table_name}' AND cols.table_schema = '{table_schema}';
"""
comments = {}
try:
with psycopg2.connect(**db_config) as conn, conn.cursor() as cursor:
cursor.execute(query)
for row in cursor.fetchall():
if row[1]:
comments[row[0]] = row[1]
self.logger.info("[get_postgres_comments][Success] Fetched %d comments.", len(comments))
except Exception as e:
self.logger.error("[get_postgres_comments][Failure] %s", e, exc_info=True)
raise
return comments
# [/DEF:get_postgres_comments:Function]
# [DEF:load_excel_mappings:Function]
# @PURPOSE: Загружает меппинги 'column_name' -> 'column_comment' из XLSX файла.
# @PRE: file_path должен указывать на существующий XLSX файл.
# @POST: Возвращается словарь с меппингами из файла.
# @THROW: Exception - При ошибках чтения файла или парсинга.
# @PARAM: file_path (str) - Путь к XLSX файлу.
# @RETURN: Dict[str, str] - Словарь с меппингами.
def load_excel_mappings(self, file_path: str) -> Dict[str, str]:
with self.logger.belief_scope("Load mappings from Excel"):
self.logger.info("[load_excel_mappings][Enter] Loading mappings from %s.", file_path)
try:
df = pd.read_excel(file_path)
mappings = df.set_index('column_name')['verbose_name'].to_dict()
self.logger.info("[load_excel_mappings][Success] Loaded %d mappings.", len(mappings))
return mappings
except Exception as e:
self.logger.error("[load_excel_mappings][Failure] %s", e, exc_info=True)
raise
# [/DEF:load_excel_mappings:Function]
# [DEF:run_mapping:Function]
# @PURPOSE: Основная функция для выполнения меппинга и обновления verbose_map датасета в Superset.
# @PRE: superset_client должен быть авторизован.
# @PRE: dataset_id должен быть существующим ID в Superset.
# @POST: Если найдены изменения, датасет в Superset обновлен через API.
# @RELATION: CALLS -> self.get_postgres_comments
# @RELATION: CALLS -> self.load_excel_mappings
# @RELATION: CALLS -> superset_client.get_dataset
# @RELATION: CALLS -> superset_client.update_dataset
# @PARAM: superset_client (SupersetClient) - Клиент Superset.
# @PARAM: dataset_id (int) - ID датасета для обновления.
# @PARAM: source (str) - Источник данных ('postgres', 'excel', 'both').
# @PARAM: postgres_config (Optional[Dict]) - Конфигурация для подключения к PostgreSQL.
# @PARAM: excel_path (Optional[str]) - Путь к XLSX файлу.
# @PARAM: table_name (Optional[str]) - Имя таблицы в PostgreSQL.
# @PARAM: table_schema (Optional[str]) - Схема таблицы в PostgreSQL.
def run_mapping(self, superset_client: SupersetClient, dataset_id: int, source: str, postgres_config: Optional[Dict] = None, excel_path: Optional[str] = None, table_name: Optional[str] = None, table_schema: Optional[str] = None):
with self.logger.belief_scope(f"Run dataset mapping for ID {dataset_id}"):
self.logger.info("[run_mapping][Enter] Starting dataset mapping for ID %d from source '%s'.", dataset_id, source)
mappings: Dict[str, str] = {}
try:
if source in ['postgres', 'both']:
assert postgres_config and table_name and table_schema, "Postgres config is required."
mappings.update(self.get_postgres_comments(postgres_config, table_name, table_schema))
if source in ['excel', 'both']:
assert excel_path, "Excel path is required."
mappings.update(self.load_excel_mappings(excel_path))
if source not in ['postgres', 'excel', 'both']:
self.logger.error("[run_mapping][Failure] Invalid source: %s.", source)
return
dataset_response = superset_client.get_dataset(dataset_id)
dataset_data = dataset_response['result']
original_columns = dataset_data.get('columns', [])
updated_columns = []
changes_made = False
for column in original_columns:
col_name = column.get('column_name')
new_column = {
"column_name": col_name,
"id": column.get("id"),
"advanced_data_type": column.get("advanced_data_type"),
"description": column.get("description"),
"expression": column.get("expression"),
"extra": column.get("extra"),
"filterable": column.get("filterable"),
"groupby": column.get("groupby"),
"is_active": column.get("is_active"),
"is_dttm": column.get("is_dttm"),
"python_date_format": column.get("python_date_format"),
"type": column.get("type"),
"uuid": column.get("uuid"),
"verbose_name": column.get("verbose_name"),
}
new_column = {k: v for k, v in new_column.items() if v is not None}
if col_name in mappings:
mapping_value = mappings[col_name]
if isinstance(mapping_value, str) and new_column.get('verbose_name') != mapping_value:
new_column['verbose_name'] = mapping_value
changes_made = True
updated_columns.append(new_column)
updated_metrics = []
for metric in dataset_data.get("metrics", []):
new_metric = {
"id": metric.get("id"),
"metric_name": metric.get("metric_name"),
"expression": metric.get("expression"),
"verbose_name": metric.get("verbose_name"),
"description": metric.get("description"),
"d3format": metric.get("d3format"),
"currency": metric.get("currency"),
"extra": metric.get("extra"),
"warning_text": metric.get("warning_text"),
"metric_type": metric.get("metric_type"),
"uuid": metric.get("uuid"),
}
updated_metrics.append({k: v for k, v in new_metric.items() if v is not None})
if changes_made:
payload_for_update = {
"database_id": dataset_data.get("database", {}).get("id"),
"table_name": dataset_data.get("table_name"),
"schema": dataset_data.get("schema"),
"columns": updated_columns,
"owners": [owner["id"] for owner in dataset_data.get("owners", [])],
"metrics": updated_metrics,
"extra": dataset_data.get("extra"),
"description": dataset_data.get("description"),
"sql": dataset_data.get("sql"),
"cache_timeout": dataset_data.get("cache_timeout"),
"catalog": dataset_data.get("catalog"),
"default_endpoint": dataset_data.get("default_endpoint"),
"external_url": dataset_data.get("external_url"),
"fetch_values_predicate": dataset_data.get("fetch_values_predicate"),
"filter_select_enabled": dataset_data.get("filter_select_enabled"),
"is_managed_externally": dataset_data.get("is_managed_externally"),
"is_sqllab_view": dataset_data.get("is_sqllab_view"),
"main_dttm_col": dataset_data.get("main_dttm_col"),
"normalize_columns": dataset_data.get("normalize_columns"),
"offset": dataset_data.get("offset"),
"template_params": dataset_data.get("template_params"),
}
payload_for_update = {k: v for k, v in payload_for_update.items() if v is not None}
superset_client.update_dataset(dataset_id, payload_for_update)
self.logger.info("[run_mapping][Success] Dataset %d columns' verbose_name updated.", dataset_id)
else:
self.logger.info("[run_mapping][State] No changes in columns' verbose_name, skipping update.")
except (AssertionError, FileNotFoundError, Exception) as e:
self.logger.error("[run_mapping][Failure] %s", e, exc_info=True)
return
# [/DEF:run_mapping:Function]
# [/DEF:DatasetMapper:Class]
# [/DEF:superset_tool.utils.dataset_mapper:Module]
# [DEF:backend.core.utils.dataset_mapper:Module]
#
# @SEMANTICS: dataset, mapping, postgresql, xlsx, superset
# @PURPOSE: Этот модуль отвечает за обновление метаданных (verbose_map) в датасетах Superset, извлекая их из PostgreSQL или XLSX-файлов.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> backend.core.superset_client
# @RELATION: DEPENDS_ON -> pandas
# @RELATION: DEPENDS_ON -> psycopg2
# @PUBLIC_API: DatasetMapper
# [SECTION: IMPORTS]
import pandas as pd # type: ignore
import psycopg2 # type: ignore
from typing import Dict, List, Optional, Any
from ..logger import logger as app_logger, belief_scope
# [/SECTION]
# [DEF:DatasetMapper:Class]
# @PURPOSE: Класс для меппинга и обновления verbose_map в датасетах Superset.
class DatasetMapper:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the mapper.
# @POST: Объект DatasetMapper инициализирован.
def __init__(self):
pass
# [/DEF:__init__:Function]
# [DEF:get_postgres_comments:Function]
# @PURPOSE: Извлекает комментарии к колонкам из системного каталога PostgreSQL.
# @PRE: db_config должен содержать валидные параметры подключения (host, port, user, password, dbname).
# @PRE: table_name и table_schema должны быть строками.
# @POST: Возвращается словарь, где ключи - имена колонок, значения - комментарии из БД.
# @THROW: Exception - При ошибках подключения или выполнения запроса к БД.
# @PARAM: db_config (Dict) - Конфигурация для подключения к БД.
# @PARAM: table_name (str) - Имя таблицы.
# @PARAM: table_schema (str) - Схема таблицы.
# @RETURN: Dict[str, str] - Словарь с комментариями к колонкам.
def get_postgres_comments(self, db_config: Dict, table_name: str, table_schema: str) -> Dict[str, str]:
with belief_scope("Fetch comments from PostgreSQL"):
app_logger.info("[get_postgres_comments][Enter] Fetching comments from PostgreSQL for %s.%s.", table_schema, table_name)
query = f"""
SELECT
cols.column_name,
CASE
WHEN pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
) LIKE '%|%' THEN
split_part(
pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
),
'|',
1
)
ELSE
pg_catalog.col_description(
(SELECT c.oid
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = cols.table_name
AND n.nspname = cols.table_schema),
cols.ordinal_position::int
)
END AS column_comment
FROM
information_schema.columns cols
WHERE cols.table_catalog = '{db_config.get('dbname')}' AND cols.table_name = '{table_name}' AND cols.table_schema = '{table_schema}';
"""
comments = {}
try:
with psycopg2.connect(**db_config) as conn, conn.cursor() as cursor:
cursor.execute(query)
for row in cursor.fetchall():
if row[1]:
comments[row[0]] = row[1]
app_logger.info("[get_postgres_comments][Success] Fetched %d comments.", len(comments))
except Exception as e:
app_logger.error("[get_postgres_comments][Failure] %s", e, exc_info=True)
raise
return comments
# [/DEF:get_postgres_comments:Function]
# [DEF:load_excel_mappings:Function]
# @PURPOSE: Загружает меппинги 'column_name' -> 'column_comment' из XLSX файла.
# @PRE: file_path должен указывать на существующий XLSX файл.
# @POST: Возвращается словарь с меппингами из файла.
# @THROW: Exception - При ошибках чтения файла или парсинга.
# @PARAM: file_path (str) - Путь к XLSX файлу.
# @RETURN: Dict[str, str] - Словарь с меппингами.
def load_excel_mappings(self, file_path: str) -> Dict[str, str]:
with belief_scope("Load mappings from Excel"):
app_logger.info("[load_excel_mappings][Enter] Loading mappings from %s.", file_path)
try:
df = pd.read_excel(file_path)
mappings = df.set_index('column_name')['verbose_name'].to_dict()
app_logger.info("[load_excel_mappings][Success] Loaded %d mappings.", len(mappings))
return mappings
except Exception as e:
app_logger.error("[load_excel_mappings][Failure] %s", e, exc_info=True)
raise
# [/DEF:load_excel_mappings:Function]
# [DEF:run_mapping:Function]
# @PURPOSE: Основная функция для выполнения меппинга и обновления verbose_map датасета в Superset.
# @PRE: superset_client должен быть авторизован.
# @PRE: dataset_id должен быть существующим ID в Superset.
# @POST: Если найдены изменения, датасет в Superset обновлен через API.
# @RELATION: CALLS -> self.get_postgres_comments
# @RELATION: CALLS -> self.load_excel_mappings
# @RELATION: CALLS -> superset_client.get_dataset
# @RELATION: CALLS -> superset_client.update_dataset
# @PARAM: superset_client (Any) - Клиент Superset.
# @PARAM: dataset_id (int) - ID датасета для обновления.
# @PARAM: source (str) - Источник данных ('postgres', 'excel', 'both').
# @PARAM: postgres_config (Optional[Dict]) - Конфигурация для подключения к PostgreSQL.
# @PARAM: excel_path (Optional[str]) - Путь к XLSX файлу.
# @PARAM: table_name (Optional[str]) - Имя таблицы в PostgreSQL.
# @PARAM: table_schema (Optional[str]) - Схема таблицы в PostgreSQL.
def run_mapping(self, superset_client: Any, dataset_id: int, source: str, postgres_config: Optional[Dict] = None, excel_path: Optional[str] = None, table_name: Optional[str] = None, table_schema: Optional[str] = None):
with belief_scope(f"Run dataset mapping for ID {dataset_id}"):
app_logger.info("[run_mapping][Enter] Starting dataset mapping for ID %d from source '%s'.", dataset_id, source)
mappings: Dict[str, str] = {}
try:
if source in ['postgres', 'both']:
assert postgres_config and table_name and table_schema, "Postgres config is required."
mappings.update(self.get_postgres_comments(postgres_config, table_name, table_schema))
if source in ['excel', 'both']:
assert excel_path, "Excel path is required."
mappings.update(self.load_excel_mappings(excel_path))
if source not in ['postgres', 'excel', 'both']:
app_logger.error("[run_mapping][Failure] Invalid source: %s.", source)
return
dataset_response = superset_client.get_dataset(dataset_id)
dataset_data = dataset_response['result']
original_columns = dataset_data.get('columns', [])
updated_columns = []
changes_made = False
for column in original_columns:
col_name = column.get('column_name')
new_column = {
"column_name": col_name,
"id": column.get("id"),
"advanced_data_type": column.get("advanced_data_type"),
"description": column.get("description"),
"expression": column.get("expression"),
"extra": column.get("extra"),
"filterable": column.get("filterable"),
"groupby": column.get("groupby"),
"is_active": column.get("is_active"),
"is_dttm": column.get("is_dttm"),
"python_date_format": column.get("python_date_format"),
"type": column.get("type"),
"uuid": column.get("uuid"),
"verbose_name": column.get("verbose_name"),
}
new_column = {k: v for k, v in new_column.items() if v is not None}
if col_name in mappings:
mapping_value = mappings[col_name]
if isinstance(mapping_value, str) and new_column.get('verbose_name') != mapping_value:
new_column['verbose_name'] = mapping_value
changes_made = True
updated_columns.append(new_column)
updated_metrics = []
for metric in dataset_data.get("metrics", []):
new_metric = {
"id": metric.get("id"),
"metric_name": metric.get("metric_name"),
"expression": metric.get("expression"),
"verbose_name": metric.get("verbose_name"),
"description": metric.get("description"),
"d3format": metric.get("d3format"),
"currency": metric.get("currency"),
"extra": metric.get("extra"),
"warning_text": metric.get("warning_text"),
"metric_type": metric.get("metric_type"),
"uuid": metric.get("uuid"),
}
updated_metrics.append({k: v for k, v in new_metric.items() if v is not None})
if changes_made:
payload_for_update = {
"database_id": dataset_data.get("database", {}).get("id"),
"table_name": dataset_data.get("table_name"),
"schema": dataset_data.get("schema"),
"columns": updated_columns,
"owners": [owner["id"] for owner in dataset_data.get("owners", [])],
"metrics": updated_metrics,
"extra": dataset_data.get("extra"),
"description": dataset_data.get("description"),
"sql": dataset_data.get("sql"),
"cache_timeout": dataset_data.get("cache_timeout"),
"catalog": dataset_data.get("catalog"),
"default_endpoint": dataset_data.get("default_endpoint"),
"external_url": dataset_data.get("external_url"),
"fetch_values_predicate": dataset_data.get("fetch_values_predicate"),
"filter_select_enabled": dataset_data.get("filter_select_enabled"),
"is_managed_externally": dataset_data.get("is_managed_externally"),
"is_sqllab_view": dataset_data.get("is_sqllab_view"),
"main_dttm_col": dataset_data.get("main_dttm_col"),
"normalize_columns": dataset_data.get("normalize_columns"),
"offset": dataset_data.get("offset"),
"template_params": dataset_data.get("template_params"),
}
payload_for_update = {k: v for k, v in payload_for_update.items() if v is not None}
superset_client.update_dataset(dataset_id, payload_for_update)
app_logger.info("[run_mapping][Success] Dataset %d columns' verbose_name updated.", dataset_id)
else:
app_logger.info("[run_mapping][State] No changes in columns' verbose_name, skipping update.")
except (AssertionError, FileNotFoundError, Exception) as e:
app_logger.error("[run_mapping][Failure] %s", e, exc_info=True)
return
# [/DEF:run_mapping:Function]
# [/DEF:DatasetMapper:Class]
# [/DEF:backend.core.utils.dataset_mapper:Module]

View File

@@ -1,507 +1,486 @@
# [DEF:superset_tool.utils.fileio:Module]
#
# @SEMANTICS: file, io, zip, yaml, temp, archive, utility
# @PURPOSE: Предоставляет набор утилит для управления файловыми операциями, включая работу с временными файлами, архивами ZIP, файлами YAML и очистку директорий.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.exceptions
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger
# @RELATION: DEPENDS_ON -> pyyaml
# @PUBLIC_API: create_temp_file, remove_empty_directories, read_dashboard_from_disk, calculate_crc32, RetentionPolicy, archive_exports, save_and_unpack_dashboard, update_yamls, create_dashboard_export, sanitize_filename, get_filename_from_headers, consolidate_archive_folders
# [SECTION: IMPORTS]
import os
import re
import zipfile
from pathlib import Path
from typing import Any, Optional, Tuple, Dict, List, Union, LiteralString, Generator
from contextlib import contextmanager
import tempfile
from datetime import date, datetime
import glob
import shutil
import zlib
from dataclasses import dataclass
import yaml
from superset_tool.exceptions import InvalidZipFormatError
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:create_temp_file:Function]
# @PURPOSE: Контекстный менеджер для создания временного файла или директории с гарантированным удалением.
# @PRE: suffix должен быть строкой, определяющей тип ресурса.
# @POST: Временный ресурс создан и путь к нему возвращен; ресурс удален после выхода из контекста.
# @PARAM: content (Optional[bytes]) - Бинарное содержимое для записи во временный файл.
# @PARAM: suffix (str) - Суффикс ресурса. Если `.dir`, создается директория.
# @PARAM: mode (str) - Режим записи в файл (e.g., 'wb').
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
# @YIELDS: Path - Путь к временному ресурсу.
# @THROW: IOError - При ошибках создания ресурса.
@contextmanager
def create_temp_file(content: Optional[bytes] = None, suffix: str = ".zip", mode: str = 'wb', dry_run = False, logger: Optional[SupersetLogger] = None) -> Generator[Path, None, None]:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope("Create temporary resource"):
resource_path = None
is_dir = suffix.startswith('.dir')
try:
if is_dir:
with tempfile.TemporaryDirectory(suffix=suffix) as temp_dir:
resource_path = Path(temp_dir)
logger.debug("[create_temp_file][State] Created temporary directory: %s", resource_path)
yield resource_path
else:
fd, temp_path_str = tempfile.mkstemp(suffix=suffix)
resource_path = Path(temp_path_str)
os.close(fd)
if content:
resource_path.write_bytes(content)
logger.debug("[create_temp_file][State] Created temporary file: %s", resource_path)
yield resource_path
finally:
if resource_path and resource_path.exists() and not dry_run:
try:
if resource_path.is_dir():
shutil.rmtree(resource_path)
logger.debug("[create_temp_file][Cleanup] Removed temporary directory: %s", resource_path)
else:
resource_path.unlink()
logger.debug("[create_temp_file][Cleanup] Removed temporary file: %s", resource_path)
except OSError as e:
logger.error("[create_temp_file][Failure] Error during cleanup of %s: %s", resource_path, e)
# [/DEF:create_temp_file:Function]
# [DEF:remove_empty_directories:Function]
# @PURPOSE: Рекурсивно удаляет все пустые поддиректории, начиная с указанного пути.
# @PRE: root_dir должен быть путем к существующей директории.
# @POST: Все пустые поддиректории удалены, возвращено их количество.
# @PARAM: root_dir (str) - Путь к корневой директории для очистки.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
# @RETURN: int - Количество удаленных директорий.
def remove_empty_directories(root_dir: str, logger: Optional[SupersetLogger] = None) -> int:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Remove empty directories in {root_dir}"):
logger.info("[remove_empty_directories][Enter] Starting cleanup of empty directories in %s", root_dir)
removed_count = 0
if not os.path.isdir(root_dir):
logger.error("[remove_empty_directories][Failure] Directory not found: %s", root_dir)
return 0
for current_dir, _, _ in os.walk(root_dir, topdown=False):
if not os.listdir(current_dir):
try:
os.rmdir(current_dir)
removed_count += 1
logger.info("[remove_empty_directories][State] Removed empty directory: %s", current_dir)
except OSError as e:
logger.error("[remove_empty_directories][Failure] Failed to remove %s: %s", current_dir, e)
logger.info("[remove_empty_directories][Exit] Removed %d empty directories.", removed_count)
return removed_count
# [/DEF:remove_empty_directories:Function]
# [DEF:read_dashboard_from_disk:Function]
# @PURPOSE: Читает бинарное содержимое файла с диска.
# @PRE: file_path должен указывать на существующий файл.
# @POST: Возвращает байты содержимого и имя файла.
# @PARAM: file_path (str) - Путь к файлу.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
# @RETURN: Tuple[bytes, str] - Кортеж (содержимое, имя файла).
# @THROW: FileNotFoundError - Если файл не найден.
def read_dashboard_from_disk(file_path: str, logger: Optional[SupersetLogger] = None) -> Tuple[bytes, str]:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Read dashboard from {file_path}"):
path = Path(file_path)
assert path.is_file(), f"Файл дашборда не найден: {file_path}"
logger.info("[read_dashboard_from_disk][Enter] Reading file: %s", file_path)
content = path.read_bytes()
if not content:
logger.warning("[read_dashboard_from_disk][Warning] File is empty: %s", file_path)
return content, path.name
# [/DEF:read_dashboard_from_disk:Function]
# [DEF:calculate_crc32:Function]
# @PURPOSE: Вычисляет контрольную сумму CRC32 для файла.
# @PRE: file_path должен быть объектом Path к существующему файлу.
# @POST: Возвращает 8-значную hex-строку CRC32.
# @PARAM: file_path (Path) - Путь к файлу.
# @RETURN: str - 8-значное шестнадцатеричное представление CRC32.
# @THROW: IOError - При ошибках чтения файла.
def calculate_crc32(file_path: Path) -> str:
logger = SupersetLogger(name="fileio")
with logger.belief_scope(f"Calculate CRC32 for {file_path}"):
with open(file_path, 'rb') as f:
crc32_value = zlib.crc32(f.read())
return f"{crc32_value:08x}"
# [/DEF:calculate_crc32:Function]
# [SECTION: DATA_CLASSES]
# [DEF:RetentionPolicy:DataClass]
# @PURPOSE: Определяет политику хранения для архивов (ежедневные, еженедельные, ежемесячные).
@dataclass
class RetentionPolicy:
daily: int = 7
weekly: int = 4
monthly: int = 12
# [/DEF:RetentionPolicy:DataClass]
# [/SECTION]
# [DEF:archive_exports:Function]
# @PURPOSE: Управляет архивом экспортированных файлов, применяя политику хранения и дедупликацию.
# @PRE: output_dir должен быть путем к существующей директории.
# @POST: Старые или дублирующиеся архивы удалены согласно политике.
# @RELATION: CALLS -> apply_retention_policy
# @RELATION: CALLS -> calculate_crc32
# @PARAM: output_dir (str) - Директория с архивами.
# @PARAM: policy (RetentionPolicy) - Политика хранения.
# @PARAM: deduplicate (bool) - Флаг для включения удаления дубликатов по CRC32.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def archive_exports(output_dir: str, policy: RetentionPolicy, deduplicate: bool = False, logger: Optional[SupersetLogger] = None) -> None:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Archive exports in {output_dir}"):
output_path = Path(output_dir)
if not output_path.is_dir():
logger.warning("[archive_exports][Skip] Archive directory not found: %s", output_dir)
return
logger.info("[archive_exports][Enter] Managing archive in %s", output_dir)
# 1. Collect all zip files
zip_files = list(output_path.glob("*.zip"))
if not zip_files:
logger.info("[archive_exports][State] No zip files found in %s", output_dir)
return
# 2. Deduplication
if deduplicate:
logger.info("[archive_exports][State] Starting deduplication...")
checksums = {}
files_to_remove = []
# Sort by modification time (newest first) to keep the latest version
zip_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
for file_path in zip_files:
try:
crc = calculate_crc32(file_path)
if crc in checksums:
files_to_remove.append(file_path)
logger.debug("[archive_exports][State] Duplicate found: %s (same as %s)", file_path.name, checksums[crc].name)
else:
checksums[crc] = file_path
except Exception as e:
logger.error("[archive_exports][Failure] Failed to calculate CRC32 for %s: %s", file_path, e)
for f in files_to_remove:
try:
f.unlink()
zip_files.remove(f)
logger.info("[archive_exports][State] Removed duplicate: %s", f.name)
except OSError as e:
logger.error("[archive_exports][Failure] Failed to remove duplicate %s: %s", f, e)
# 3. Retention Policy
files_with_dates = []
for file_path in zip_files:
# Try to extract date from filename
# Pattern: ..._YYYYMMDD_HHMMSS.zip or ..._YYYYMMDD.zip
match = re.search(r'_(\d{8})_', file_path.name)
file_date = None
if match:
try:
date_str = match.group(1)
file_date = datetime.strptime(date_str, "%Y%m%d").date()
except ValueError:
pass
if not file_date:
# Fallback to modification time
file_date = datetime.fromtimestamp(file_path.stat().st_mtime).date()
files_with_dates.append((file_path, file_date))
files_to_keep = apply_retention_policy(files_with_dates, policy, logger)
for file_path, _ in files_with_dates:
if file_path not in files_to_keep:
try:
file_path.unlink()
logger.info("[archive_exports][State] Removed by retention policy: %s", file_path.name)
except OSError as e:
logger.error("[archive_exports][Failure] Failed to remove %s: %s", file_path, e)
# [/DEF:archive_exports:Function]
# [DEF:apply_retention_policy:Function]
# @PURPOSE: (Helper) Применяет политику хранения к списку файлов, возвращая те, что нужно сохранить.
# @PRE: files_with_dates is a list of (Path, date) tuples.
# @POST: Returns a set of files to keep.
# @PARAM: files_with_dates (List[Tuple[Path, date]]) - Список файлов с датами.
# @PARAM: policy (RetentionPolicy) - Политика хранения.
# @PARAM: logger (SupersetLogger) - Логгер.
# @RETURN: set - Множество путей к файлам, которые должны быть сохранены.
def apply_retention_policy(files_with_dates: List[Tuple[Path, date]], policy: RetentionPolicy, logger: SupersetLogger) -> set:
with logger.belief_scope("Apply retention policy"):
# Сортируем по дате (от новой к старой)
sorted_files = sorted(files_with_dates, key=lambda x: x[1], reverse=True)
# Словарь для хранения файлов по категориям
daily_files = []
weekly_files = []
monthly_files = []
today = date.today()
for file_path, file_date in sorted_files:
# Ежедневные
if (today - file_date).days < policy.daily:
daily_files.append(file_path)
# Еженедельные
elif (today - file_date).days < policy.weekly * 7:
weekly_files.append(file_path)
# Ежемесячные
elif (today - file_date).days < policy.monthly * 30:
monthly_files.append(file_path)
# Возвращаем множество файлов, которые нужно сохранить
files_to_keep = set()
files_to_keep.update(daily_files)
files_to_keep.update(weekly_files[:policy.weekly])
files_to_keep.update(monthly_files[:policy.monthly])
logger.debug("[apply_retention_policy][State] Keeping %d files according to retention policy", len(files_to_keep))
return files_to_keep
# [/DEF:apply_retention_policy:Function]
# [DEF:save_and_unpack_dashboard:Function]
# @PURPOSE: Сохраняет бинарное содержимое ZIP-архива на диск и опционально распаковывает его.
# @PRE: zip_content должен быть байтами валидного ZIP-архива.
# @POST: ZIP-файл сохранен, и если unpack=True, он распакован в output_dir.
# @PARAM: zip_content (bytes) - Содержимое ZIP-архива.
# @PARAM: output_dir (Union[str, Path]) - Директория для сохранения.
# @PARAM: unpack (bool) - Флаг, нужно ли распаковывать архив.
# @PARAM: original_filename (Optional[str]) - Исходное имя файла для сохранения.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
# @RETURN: Tuple[Path, Optional[Path]] - Путь к ZIP-файлу и, если применимо, путь к директории с распаковкой.
# @THROW: InvalidZipFormatError - При ошибке формата ZIP.
def save_and_unpack_dashboard(zip_content: bytes, output_dir: Union[str, Path], unpack: bool = False, original_filename: Optional[str] = None, logger: Optional[SupersetLogger] = None) -> Tuple[Path, Optional[Path]]:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope("Save and unpack dashboard"):
logger.info("[save_and_unpack_dashboard][Enter] Processing dashboard. Unpack: %s", unpack)
try:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
zip_name = sanitize_filename(original_filename) if original_filename else f"dashboard_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
zip_path = output_path / zip_name
zip_path.write_bytes(zip_content)
logger.info("[save_and_unpack_dashboard][State] Dashboard saved to: %s", zip_path)
if unpack:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(output_path)
logger.info("[save_and_unpack_dashboard][State] Dashboard unpacked to: %s", output_path)
return zip_path, output_path
return zip_path, None
except zipfile.BadZipFile as e:
logger.error("[save_and_unpack_dashboard][Failure] Invalid ZIP archive: %s", e)
raise InvalidZipFormatError(f"Invalid ZIP file: {e}") from e
# [/DEF:save_and_unpack_dashboard:Function]
# [DEF:update_yamls:Function]
# @PURPOSE: Обновляет конфигурации в YAML-файлах, заменяя значения или применяя regex.
# @PRE: path должен быть существующей директорией.
# @POST: Все YAML файлы в директории обновлены согласно переданным параметрам.
# @RELATION: CALLS -> _update_yaml_file
# @THROW: FileNotFoundError - Если `path` не существует.
# @PARAM: db_configs (Optional[List[Dict]]) - Список конфигураций для замены.
# @PARAM: path (str) - Путь к директории с YAML файлами.
# @PARAM: regexp_pattern (Optional[LiteralString]) - Паттерн для поиска.
# @PARAM: replace_string (Optional[LiteralString]) - Строка для замены.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def update_yamls(db_configs: Optional[List[Dict[str, Any]]] = None, path: str = "dashboards", regexp_pattern: Optional[LiteralString] = None, replace_string: Optional[LiteralString] = None, logger: Optional[SupersetLogger] = None) -> None:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope("Update YAML configurations"):
logger.info("[update_yamls][Enter] Starting YAML configuration update.")
dir_path = Path(path)
assert dir_path.is_dir(), f"Путь {path} не существует или не является директорией"
configs: List[Dict[str, Any]] = db_configs or []
for file_path in dir_path.rglob("*.yaml"):
_update_yaml_file(file_path, configs, regexp_pattern, replace_string, logger)
# [/DEF:update_yamls:Function]
# [DEF:_update_yaml_file:Function]
# @PURPOSE: (Helper) Обновляет один YAML файл.
# @PRE: file_path должен быть объектом Path к существующему YAML файлу.
# @POST: Файл обновлен согласно переданным конфигурациям или регулярному выражению.
# @PARAM: file_path (Path) - Путь к файлу.
# @PARAM: db_configs (List[Dict]) - Конфигурации.
# @PARAM: regexp_pattern (Optional[str]) - Паттерн.
# @PARAM: replace_string (Optional[str]) - Замена.
# @PARAM: logger (SupersetLogger) - Логгер.
def _update_yaml_file(file_path: Path, db_configs: List[Dict[str, Any]], regexp_pattern: Optional[str], replace_string: Optional[str], logger: SupersetLogger) -> None:
with logger.belief_scope(f"Update YAML file: {file_path}"):
# Читаем содержимое файла
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
logger.error("[_update_yaml_file][Failure] Failed to read %s: %s", file_path, e)
return
# Если задан pattern и replace_string, применяем замену по регулярному выражению
if regexp_pattern and replace_string:
try:
new_content = re.sub(regexp_pattern, replace_string, content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
logger.info("[_update_yaml_file][State] Updated %s using regex pattern", file_path)
except Exception as e:
logger.error("[_update_yaml_file][Failure] Error applying regex to %s: %s", file_path, e)
# Если заданы конфигурации, заменяем значения (поддержка old/new)
if db_configs:
try:
# Прямой текстовый заменитель для старых/новых значений, чтобы сохранить структуру файла
modified_content = content
for cfg in db_configs:
# Ожидаем структуру: {'old': {...}, 'new': {...}}
old_cfg = cfg.get('old', {})
new_cfg = cfg.get('new', {})
for key, old_val in old_cfg.items():
if key in new_cfg:
new_val = new_cfg[key]
# Заменяем только точные совпадения старого значения в тексте YAML, используя ключ для контекста
if isinstance(old_val, str):
# Ищем паттерн: key: "value" или key: value
key_pattern = re.escape(key)
val_pattern = re.escape(old_val)
# Группы: 1=ключ+разделитель, 2=открывающая кавычка (опц), 3=значение, 4=закрывающая кавычка (опц)
pattern = rf'({key_pattern}\s*:\s*)(["\']?)({val_pattern})(["\']?)'
# [DEF:replacer:Function]
# @PURPOSE: Функция замены, сохраняющая кавычки если они были.
# @PRE: match должен быть объектом совпадения регулярного выражения.
# @POST: Возвращает строку с новым значением, сохраняя префикс и кавычки.
def replacer(match):
with logger.belief_scope("replacer"):
prefix = match.group(1)
quote_open = match.group(2)
quote_close = match.group(4)
return f"{prefix}{quote_open}{new_val}{quote_close}"
# [/DEF:replacer:Function]
modified_content = re.sub(pattern, replacer, modified_content)
logger.info("[_update_yaml_file][State] Replaced '%s' with '%s' for key %s in %s", old_val, new_val, key, file_path)
# Записываем обратно изменённый контент без парсинга YAML, сохраняем оригинальное форматирование
with open(file_path, 'w', encoding='utf-8') as f:
f.write(modified_content)
except Exception as e:
logger.error("[_update_yaml_file][Failure] Error performing raw replacement in %s: %s", file_path, e)
# [/DEF:_update_yaml_file:Function]
# [DEF:create_dashboard_export:Function]
# @PURPOSE: Создает ZIP-архив из указанных исходных путей.
# @PRE: source_paths должен содержать существующие пути.
# @POST: ZIP-архив создан по пути zip_path.
# @PARAM: zip_path (Union[str, Path]) - Путь для сохранения ZIP архива.
# @PARAM: source_paths (List[Union[str, Path]]) - Список исходных путей для архивации.
# @PARAM: exclude_extensions (Optional[List[str]]) - Список расширений для исключения.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
# @RETURN: bool - `True` при успехе, `False` при ошибке.
def create_dashboard_export(zip_path: Union[str, Path], source_paths: List[Union[str, Path]], exclude_extensions: Optional[List[str]] = None, logger: Optional[SupersetLogger] = None) -> bool:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Create dashboard export: {zip_path}"):
logger.info("[create_dashboard_export][Enter] Packing dashboard: %s -> %s", source_paths, zip_path)
try:
exclude_ext = [ext.lower() for ext in exclude_extensions or []]
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for src_path_str in source_paths:
src_path = Path(src_path_str)
assert src_path.exists(), f"Путь не найден: {src_path}"
for item in src_path.rglob('*'):
if item.is_file() and item.suffix.lower() not in exclude_ext:
arcname = item.relative_to(src_path.parent)
zipf.write(item, arcname)
logger.info("[create_dashboard_export][Exit] Archive created: %s", zip_path)
return True
except (IOError, zipfile.BadZipFile, AssertionError) as e:
logger.error("[create_dashboard_export][Failure] Error: %s", e, exc_info=True)
return False
# [/DEF:create_dashboard_export:Function]
# [DEF:sanitize_filename:Function]
# @PURPOSE: Очищает строку от символов, недопустимых в именах файлов.
# @PRE: filename должен быть строкой.
# @POST: Возвращает строку без спецсимволов.
# @PARAM: filename (str) - Исходное имя файла.
# @RETURN: str - Очищенная строка.
def sanitize_filename(filename: str) -> str:
logger = SupersetLogger(name="fileio")
with logger.belief_scope(f"Sanitize filename: {filename}"):
return re.sub(r'[\\/*?:"<>|]', "_", filename).strip()
# [/DEF:sanitize_filename:Function]
# [DEF:get_filename_from_headers:Function]
# @PURPOSE: Извлекает имя файла из HTTP заголовка 'Content-Disposition'.
# @PRE: headers должен быть словарем заголовков.
# @POST: Возвращает имя файла или None, если заголовок отсутствует.
# @PARAM: headers (dict) - Словарь HTTP заголовков.
# @RETURN: Optional[str] - Имя файла or `None`.
def get_filename_from_headers(headers: dict) -> Optional[str]:
logger = SupersetLogger(name="fileio")
with logger.belief_scope("Get filename from headers"):
content_disposition = headers.get("Content-Disposition", "")
if match := re.search(r'filename="?([^"]+)"?', content_disposition):
return match.group(1).strip()
return None
# [/DEF:get_filename_from_headers:Function]
# [DEF:consolidate_archive_folders:Function]
# @PURPOSE: Консолидирует директории архивов на основе общего слага в имени.
# @PRE: root_directory должен быть объектом Path к существующей директории.
# @POST: Директории с одинаковым префиксом объединены в одну.
# @THROW: TypeError, ValueError - Если `root_directory` невалиден.
# @PARAM: root_directory (Path) - Корневая директория для консолидации.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def consolidate_archive_folders(root_directory: Path, logger: Optional[SupersetLogger] = None) -> None:
logger = logger or SupersetLogger(name="fileio")
with logger.belief_scope(f"Consolidate archives in {root_directory}"):
assert isinstance(root_directory, Path), "root_directory must be a Path object."
assert root_directory.is_dir(), "root_directory must be an existing directory."
logger.info("[consolidate_archive_folders][Enter] Consolidating archives in %s", root_directory)
# Собираем все директории с архивами
archive_dirs = []
for item in root_directory.iterdir():
if item.is_dir():
# Проверяем, есть ли в директории ZIP-архивы
if any(item.glob("*.zip")):
archive_dirs.append(item)
# Группируем по слагу (части имени до первого '_')
slug_groups = {}
for dir_path in archive_dirs:
dir_name = dir_path.name
slug = dir_name.split('_')[0] if '_' in dir_name else dir_name
if slug not in slug_groups:
slug_groups[slug] = []
slug_groups[slug].append(dir_path)
# Для каждой группы консолидируем
for slug, dirs in slug_groups.items():
if len(dirs) <= 1:
continue
# Создаем целевую директорию
target_dir = root_directory / slug
target_dir.mkdir(exist_ok=True)
logger.info("[consolidate_archive_folders][State] Consolidating %d directories under %s", len(dirs), target_dir)
# Перемещаем содержимое
for source_dir in dirs:
if source_dir == target_dir:
continue
for item in source_dir.iterdir():
dest_item = target_dir / item.name
try:
if item.is_dir():
shutil.move(str(item), str(dest_item))
else:
shutil.move(str(item), str(dest_item))
except Exception as e:
logger.error("[consolidate_archive_folders][Failure] Failed to move %s to %s: %s", item, dest_item, e)
# Удаляем исходную директорию
try:
source_dir.rmdir()
logger.info("[consolidate_archive_folders][State] Removed source directory: %s", source_dir)
except Exception as e:
logger.error("[consolidate_archive_folders][Failure] Failed to remove source directory %s: %s", source_dir, e)
# [/DEF:consolidate_archive_folders:Function]
# [/DEF:superset_tool.utils.fileio:Module]
# [DEF:backend.core.utils.fileio:Module]
#
# @SEMANTICS: file, io, zip, yaml, temp, archive, utility
# @PURPOSE: Предоставляет набор утилит для управления файловыми операциями, включая работу с временными файлами, архивами ZIP, файлами YAML и очистку директорий.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> backend.src.core.logger
# @RELATION: DEPENDS_ON -> pyyaml
# @PUBLIC_API: create_temp_file, remove_empty_directories, read_dashboard_from_disk, calculate_crc32, RetentionPolicy, archive_exports, save_and_unpack_dashboard, update_yamls, create_dashboard_export, sanitize_filename, get_filename_from_headers, consolidate_archive_folders
# [SECTION: IMPORTS]
import os
import re
import zipfile
from pathlib import Path
from typing import Any, Optional, Tuple, Dict, List, Union, LiteralString, Generator
from contextlib import contextmanager
import tempfile
from datetime import date, datetime
import shutil
import zlib
from dataclasses import dataclass
import yaml
from ..logger import logger as app_logger, belief_scope
# [/SECTION]
# [DEF:InvalidZipFormatError:Class]
class InvalidZipFormatError(Exception):
pass
# [DEF:create_temp_file:Function]
# @PURPOSE: Контекстный менеджер для создания временного файла или директории с гарантированным удалением.
# @PRE: suffix должен быть строкой, определяющей тип ресурса.
# @POST: Временный ресурс создан и путь к нему возвращен; ресурс удален после выхода из контекста.
# @PARAM: content (Optional[bytes]) - Бинарное содержимое для записи во временный файл.
# @PARAM: suffix (str) - Суффикс ресурса. Если `.dir`, создается директория.
# @PARAM: mode (str) - Режим записи в файл (e.g., 'wb').
# @YIELDS: Path - Путь к временному ресурсу.
# @THROW: IOError - При ошибках создания ресурса.
@contextmanager
def create_temp_file(content: Optional[bytes] = None, suffix: str = ".zip", mode: str = 'wb', dry_run = False) -> Generator[Path, None, None]:
with belief_scope("Create temporary resource"):
resource_path = None
is_dir = suffix.startswith('.dir')
try:
if is_dir:
with tempfile.TemporaryDirectory(suffix=suffix) as temp_dir:
resource_path = Path(temp_dir)
app_logger.debug("[create_temp_file][State] Created temporary directory: %s", resource_path)
yield resource_path
else:
fd, temp_path_str = tempfile.mkstemp(suffix=suffix)
resource_path = Path(temp_path_str)
os.close(fd)
if content:
resource_path.write_bytes(content)
app_logger.debug("[create_temp_file][State] Created temporary file: %s", resource_path)
yield resource_path
finally:
if resource_path and resource_path.exists() and not dry_run:
try:
if resource_path.is_dir():
shutil.rmtree(resource_path)
app_logger.debug("[create_temp_file][Cleanup] Removed temporary directory: %s", resource_path)
else:
resource_path.unlink()
app_logger.debug("[create_temp_file][Cleanup] Removed temporary file: %s", resource_path)
except OSError as e:
app_logger.error("[create_temp_file][Failure] Error during cleanup of %s: %s", resource_path, e)
# [/DEF:create_temp_file:Function]
# [DEF:remove_empty_directories:Function]
# @PURPOSE: Рекурсивно удаляет все пустые поддиректории, начиная с указанного пути.
# @PRE: root_dir должен быть путем к существующей директории.
# @POST: Все пустые поддиректории удалены, возвращено их количество.
# @PARAM: root_dir (str) - Путь к корневой директории для очистки.
# @RETURN: int - Количество удаленных директорий.
def remove_empty_directories(root_dir: str) -> int:
with belief_scope(f"Remove empty directories in {root_dir}"):
app_logger.info("[remove_empty_directories][Enter] Starting cleanup of empty directories in %s", root_dir)
removed_count = 0
if not os.path.isdir(root_dir):
app_logger.error("[remove_empty_directories][Failure] Directory not found: %s", root_dir)
return 0
for current_dir, _, _ in os.walk(root_dir, topdown=False):
if not os.listdir(current_dir):
try:
os.rmdir(current_dir)
removed_count += 1
app_logger.info("[remove_empty_directories][State] Removed empty directory: %s", current_dir)
except OSError as e:
app_logger.error("[remove_empty_directories][Failure] Failed to remove %s: %s", current_dir, e)
app_logger.info("[remove_empty_directories][Exit] Removed %d empty directories.", removed_count)
return removed_count
# [/DEF:remove_empty_directories:Function]
# [DEF:read_dashboard_from_disk:Function]
# @PURPOSE: Читает бинарное содержимое файла с диска.
# @PRE: file_path должен указывать на существующий файл.
# @POST: Возвращает байты содержимого и имя файла.
# @PARAM: file_path (str) - Путь к файлу.
# @RETURN: Tuple[bytes, str] - Кортеж (содержимое, имя файла).
# @THROW: FileNotFoundError - Если файл не найден.
def read_dashboard_from_disk(file_path: str) -> Tuple[bytes, str]:
with belief_scope(f"Read dashboard from {file_path}"):
path = Path(file_path)
assert path.is_file(), f"Файл дашборда не найден: {file_path}"
app_logger.info("[read_dashboard_from_disk][Enter] Reading file: %s", file_path)
content = path.read_bytes()
if not content:
app_logger.warning("[read_dashboard_from_disk][Warning] File is empty: %s", file_path)
return content, path.name
# [/DEF:read_dashboard_from_disk:Function]
# [DEF:calculate_crc32:Function]
# @PURPOSE: Вычисляет контрольную сумму CRC32 для файла.
# @PRE: file_path должен быть объектом Path к существующему файлу.
# @POST: Возвращает 8-значную hex-строку CRC32.
# @PARAM: file_path (Path) - Путь к файлу.
# @RETURN: str - 8-значное шестнадцатеричное представление CRC32.
# @THROW: IOError - При ошибках чтения файла.
def calculate_crc32(file_path: Path) -> str:
with belief_scope(f"Calculate CRC32 for {file_path}"):
with open(file_path, 'rb') as f:
crc32_value = zlib.crc32(f.read())
return f"{crc32_value:08x}"
# [/DEF:calculate_crc32:Function]
# [SECTION: DATA_CLASSES]
# [DEF:RetentionPolicy:DataClass]
# @PURPOSE: Определяет политику хранения для архивов (ежедневные, еженедельные, ежемесячные).
@dataclass
class RetentionPolicy:
daily: int = 7
weekly: int = 4
monthly: int = 12
# [/DEF:RetentionPolicy:DataClass]
# [/SECTION]
# [DEF:archive_exports:Function]
# @PURPOSE: Управляет архивом экспортированных файлов, применяя политику хранения и дедупликацию.
# @PRE: output_dir должен быть путем к существующей директории.
# @POST: Старые или дублирующиеся архивы удалены согласно политике.
# @RELATION: CALLS -> apply_retention_policy
# @RELATION: CALLS -> calculate_crc32
# @PARAM: output_dir (str) - Директория с архивами.
# @PARAM: policy (RetentionPolicy) - Политика хранения.
# @PARAM: deduplicate (bool) - Флаг для включения удаления дубликатов по CRC32.
def archive_exports(output_dir: str, policy: RetentionPolicy, deduplicate: bool = False) -> None:
with belief_scope(f"Archive exports in {output_dir}"):
output_path = Path(output_dir)
if not output_path.is_dir():
app_logger.warning("[archive_exports][Skip] Archive directory not found: %s", output_dir)
return
app_logger.info("[archive_exports][Enter] Managing archive in %s", output_dir)
# 1. Collect all zip files
zip_files = list(output_path.glob("*.zip"))
if not zip_files:
app_logger.info("[archive_exports][State] No zip files found in %s", output_dir)
return
# 2. Deduplication
if deduplicate:
app_logger.info("[archive_exports][State] Starting deduplication...")
checksums = {}
files_to_remove = []
# Sort by modification time (newest first) to keep the latest version
zip_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
for file_path in zip_files:
try:
crc = calculate_crc32(file_path)
if crc in checksums:
files_to_remove.append(file_path)
app_logger.debug("[archive_exports][State] Duplicate found: %s (same as %s)", file_path.name, checksums[crc].name)
else:
checksums[crc] = file_path
except Exception as e:
app_logger.error("[archive_exports][Failure] Failed to calculate CRC32 for %s: %s", file_path, e)
for f in files_to_remove:
try:
f.unlink()
zip_files.remove(f)
app_logger.info("[archive_exports][State] Removed duplicate: %s", f.name)
except OSError as e:
app_logger.error("[archive_exports][Failure] Failed to remove duplicate %s: %s", f, e)
# 3. Retention Policy
files_with_dates = []
for file_path in zip_files:
# Try to extract date from filename
# Pattern: ..._YYYYMMDD_HHMMSS.zip or ..._YYYYMMDD.zip
match = re.search(r'_(\d{8})_', file_path.name)
file_date = None
if match:
try:
date_str = match.group(1)
file_date = datetime.strptime(date_str, "%Y%m%d").date()
except ValueError:
pass
if not file_date:
# Fallback to modification time
file_date = datetime.fromtimestamp(file_path.stat().st_mtime).date()
files_with_dates.append((file_path, file_date))
files_to_keep = apply_retention_policy(files_with_dates, policy)
for file_path, _ in files_with_dates:
if file_path not in files_to_keep:
try:
file_path.unlink()
app_logger.info("[archive_exports][State] Removed by retention policy: %s", file_path.name)
except OSError as e:
app_logger.error("[archive_exports][Failure] Failed to remove %s: %s", file_path, e)
# [/DEF:archive_exports:Function]
# [DEF:apply_retention_policy:Function]
# @PURPOSE: (Helper) Применяет политику хранения к списку файлов, возвращая те, что нужно сохранить.
# @PRE: files_with_dates is a list of (Path, date) tuples.
# @POST: Returns a set of files to keep.
# @PARAM: files_with_dates (List[Tuple[Path, date]]) - Список файлов с датами.
# @PARAM: policy (RetentionPolicy) - Политика хранения.
# @RETURN: set - Множество путей к файлам, которые должны быть сохранены.
def apply_retention_policy(files_with_dates: List[Tuple[Path, date]], policy: RetentionPolicy) -> set:
with belief_scope("Apply retention policy"):
# Сортируем по дате (от новой к старой)
sorted_files = sorted(files_with_dates, key=lambda x: x[1], reverse=True)
# Словарь для хранения файлов по категориям
daily_files = []
weekly_files = []
monthly_files = []
today = date.today()
for file_path, file_date in sorted_files:
# Ежедневные
if (today - file_date).days < policy.daily:
daily_files.append(file_path)
# Еженедельные
elif (today - file_date).days < policy.weekly * 7:
weekly_files.append(file_path)
# Ежемесячные
elif (today - file_date).days < policy.monthly * 30:
monthly_files.append(file_path)
# Возвращаем множество файлов, которые нужно сохранить
files_to_keep = set()
files_to_keep.update(daily_files)
files_to_keep.update(weekly_files[:policy.weekly])
files_to_keep.update(monthly_files[:policy.monthly])
app_logger.debug("[apply_retention_policy][State] Keeping %d files according to retention policy", len(files_to_keep))
return files_to_keep
# [/DEF:apply_retention_policy:Function]
# [DEF:save_and_unpack_dashboard:Function]
# @PURPOSE: Сохраняет бинарное содержимое ZIP-архива на диск и опционально распаковывает его.
# @PRE: zip_content должен быть байтами валидного ZIP-архива.
# @POST: ZIP-файл сохранен, и если unpack=True, он распакован в output_dir.
# @PARAM: zip_content (bytes) - Содержимое ZIP-архива.
# @PARAM: output_dir (Union[str, Path]) - Директория для сохранения.
# @PARAM: unpack (bool) - Флаг, нужно ли распаковывать архив.
# @PARAM: original_filename (Optional[str]) - Исходное имя файла для сохранения.
# @RETURN: Tuple[Path, Optional[Path]] - Путь к ZIP-файлу и, если применимо, путь к директории с распаковкой.
# @THROW: InvalidZipFormatError - При ошибке формата ZIP.
def save_and_unpack_dashboard(zip_content: bytes, output_dir: Union[str, Path], unpack: bool = False, original_filename: Optional[str] = None) -> Tuple[Path, Optional[Path]]:
with belief_scope("Save and unpack dashboard"):
app_logger.info("[save_and_unpack_dashboard][Enter] Processing dashboard. Unpack: %s", unpack)
try:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
zip_name = sanitize_filename(original_filename) if original_filename else f"dashboard_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
zip_path = output_path / zip_name
zip_path.write_bytes(zip_content)
app_logger.info("[save_and_unpack_dashboard][State] Dashboard saved to: %s", zip_path)
if unpack:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(output_path)
app_logger.info("[save_and_unpack_dashboard][State] Dashboard unpacked to: %s", output_path)
return zip_path, output_path
return zip_path, None
except zipfile.BadZipFile as e:
app_logger.error("[save_and_unpack_dashboard][Failure] Invalid ZIP archive: %s", e)
raise InvalidZipFormatError(f"Invalid ZIP file: {e}") from e
# [/DEF:save_and_unpack_dashboard:Function]
# [DEF:update_yamls:Function]
# @PURPOSE: Обновляет конфигурации в YAML-файлах, заменяя значения или применяя regex.
# @PRE: path должен быть существующей директорией.
# @POST: Все YAML файлы в директории обновлены согласно переданным параметрам.
# @RELATION: CALLS -> _update_yaml_file
# @THROW: FileNotFoundError - Если `path` не существует.
# @PARAM: db_configs (Optional[List[Dict]]) - Список конфигураций для замены.
# @PARAM: path (str) - Путь к директории с YAML файлами.
# @PARAM: regexp_pattern (Optional[LiteralString]) - Паттерн для поиска.
# @PARAM: replace_string (Optional[LiteralString]) - Строка для замены.
def update_yamls(db_configs: Optional[List[Dict[str, Any]]] = None, path: str = "dashboards", regexp_pattern: Optional[LiteralString] = None, replace_string: Optional[LiteralString] = None) -> None:
with belief_scope("Update YAML configurations"):
app_logger.info("[update_yamls][Enter] Starting YAML configuration update.")
dir_path = Path(path)
assert dir_path.is_dir(), f"Путь {path} не существует или не является директорией"
configs: List[Dict[str, Any]] = db_configs or []
for file_path in dir_path.rglob("*.yaml"):
_update_yaml_file(file_path, configs, regexp_pattern, replace_string)
# [/DEF:update_yamls:Function]
# [DEF:_update_yaml_file:Function]
# @PURPOSE: (Helper) Обновляет один YAML файл.
# @PRE: file_path должен быть объектом Path к существующему YAML файлу.
# @POST: Файл обновлен согласно переданным конфигурациям или регулярному выражению.
# @PARAM: file_path (Path) - Путь к файлу.
# @PARAM: db_configs (List[Dict]) - Конфигурации.
# @PARAM: regexp_pattern (Optional[str]) - Паттерн.
# @PARAM: replace_string (Optional[str]) - Замена.
def _update_yaml_file(file_path: Path, db_configs: List[Dict[str, Any]], regexp_pattern: Optional[str], replace_string: Optional[str]) -> None:
with belief_scope(f"Update YAML file: {file_path}"):
# Читаем содержимое файла
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
app_logger.error("[_update_yaml_file][Failure] Failed to read %s: %s", file_path, e)
return
# Если задан pattern и replace_string, применяем замену по регулярному выражению
if regexp_pattern and replace_string:
try:
new_content = re.sub(regexp_pattern, replace_string, content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
app_logger.info("[_update_yaml_file][State] Updated %s using regex pattern", file_path)
except Exception as e:
app_logger.error("[_update_yaml_file][Failure] Error applying regex to %s: %s", file_path, e)
# Если заданы конфигурации, заменяем значения (поддержка old/new)
if db_configs:
try:
# Прямой текстовый заменитель для старых/новых значений, чтобы сохранить структуру файла
modified_content = content
for cfg in db_configs:
# Ожидаем структуру: {'old': {...}, 'new': {...}}
old_cfg = cfg.get('old', {})
new_cfg = cfg.get('new', {})
for key, old_val in old_cfg.items():
if key in new_cfg:
new_val = new_cfg[key]
# Заменяем только точные совпадения старого значения в тексте YAML, используя ключ для контекста
if isinstance(old_val, str):
# Ищем паттерн: key: "value" или key: value
key_pattern = re.escape(key)
val_pattern = re.escape(old_val)
# Группы: 1=ключ+разделитель, 2=открывающая кавычка (опц), 3=значение, 4=закрывающая кавычка (опц)
pattern = rf'({key_pattern}\s*:\s*)(["\']?)({val_pattern})(["\']?)'
# [DEF:replacer:Function]
# @PURPOSE: Функция замены, сохраняющая кавычки если они были.
# @PRE: match должен быть объектом совпадения регулярного выражения.
# @POST: Возвращает строку с новым значением, сохраняя префикс и кавычки.
def replacer(match):
prefix = match.group(1)
quote_open = match.group(2)
quote_close = match.group(4)
return f"{prefix}{quote_open}{new_val}{quote_close}"
# [/DEF:replacer:Function]
modified_content = re.sub(pattern, replacer, modified_content)
app_logger.info("[_update_yaml_file][State] Replaced '%s' with '%s' for key %s in %s", old_val, new_val, key, file_path)
# Записываем обратно изменённый контент без парсинга YAML, сохраняем оригинальное форматирование
with open(file_path, 'w', encoding='utf-8') as f:
f.write(modified_content)
except Exception as e:
app_logger.error("[_update_yaml_file][Failure] Error performing raw replacement in %s: %s", file_path, e)
# [/DEF:_update_yaml_file:Function]
# [DEF:create_dashboard_export:Function]
# @PURPOSE: Создает ZIP-архив из указанных исходных путей.
# @PRE: source_paths должен содержать существующие пути.
# @POST: ZIP-архив создан по пути zip_path.
# @PARAM: zip_path (Union[str, Path]) - Путь для сохранения ZIP архива.
# @PARAM: source_paths (List[Union[str, Path]]) - Список исходных путей для архивации.
# @PARAM: exclude_extensions (Optional[List[str]]) - Список расширений для исключения.
# @RETURN: bool - `True` при успехе, `False` при ошибке.
def create_dashboard_export(zip_path: Union[str, Path], source_paths: List[Union[str, Path]], exclude_extensions: Optional[List[str]] = None) -> bool:
with belief_scope(f"Create dashboard export: {zip_path}"):
app_logger.info("[create_dashboard_export][Enter] Packing dashboard: %s -> %s", source_paths, zip_path)
try:
exclude_ext = [ext.lower() for ext in exclude_extensions or []]
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for src_path_str in source_paths:
src_path = Path(src_path_str)
assert src_path.exists(), f"Путь не найден: {src_path}"
for item in src_path.rglob('*'):
if item.is_file() and item.suffix.lower() not in exclude_ext:
arcname = item.relative_to(src_path.parent)
zipf.write(item, arcname)
app_logger.info("[create_dashboard_export][Exit] Archive created: %s", zip_path)
return True
except (IOError, zipfile.BadZipFile, AssertionError) as e:
app_logger.error("[create_dashboard_export][Failure] Error: %s", e, exc_info=True)
return False
# [/DEF:create_dashboard_export:Function]
# [DEF:sanitize_filename:Function]
# @PURPOSE: Очищает строку от символов, недопустимых в именах файлов.
# @PRE: filename должен быть строкой.
# @POST: Возвращает строку без спецсимволов.
# @PARAM: filename (str) - Исходное имя файла.
# @RETURN: str - Очищенная строка.
def sanitize_filename(filename: str) -> str:
with belief_scope(f"Sanitize filename: {filename}"):
return re.sub(r'[\\/*?:"<>|]', "_", filename).strip()
# [/DEF:sanitize_filename:Function]
# [DEF:get_filename_from_headers:Function]
# @PURPOSE: Извлекает имя файла из HTTP заголовка 'Content-Disposition'.
# @PRE: headers должен быть словарем заголовков.
# @POST: Возвращает имя файла или None, если заголовок отсутствует.
# @PARAM: headers (dict) - Словарь HTTP заголовков.
# @RETURN: Optional[str] - Имя файла or `None`.
def get_filename_from_headers(headers: dict) -> Optional[str]:
with belief_scope("Get filename from headers"):
content_disposition = headers.get("Content-Disposition", "")
if match := re.search(r'filename="?([^"]+)"?', content_disposition):
return match.group(1).strip()
return None
# [/DEF:get_filename_from_headers:Function]
# [DEF:consolidate_archive_folders:Function]
# @PURPOSE: Консолидирует директории архивов на основе общего слага в имени.
# @PRE: root_directory должен быть объектом Path к существующей директории.
# @POST: Директории с одинаковым префиксом объединены в одну.
# @THROW: TypeError, ValueError - Если `root_directory` невалиден.
# @PARAM: root_directory (Path) - Корневая директория для консолидации.
def consolidate_archive_folders(root_directory: Path) -> None:
with belief_scope(f"Consolidate archives in {root_directory}"):
assert isinstance(root_directory, Path), "root_directory must be a Path object."
assert root_directory.is_dir(), "root_directory must be an existing directory."
app_logger.info("[consolidate_archive_folders][Enter] Consolidating archives in %s", root_directory)
# Собираем все директории с архивами
archive_dirs = []
for item in root_directory.iterdir():
if item.is_dir():
# Проверяем, есть ли в директории ZIP-архивы
if any(item.glob("*.zip")):
archive_dirs.append(item)
# Группируем по слагу (части имени до первого '_')
slug_groups = {}
for dir_path in archive_dirs:
dir_name = dir_path.name
slug = dir_name.split('_')[0] if '_' in dir_name else dir_name
if slug not in slug_groups:
slug_groups[slug] = []
slug_groups[slug].append(dir_path)
# Для каждой группы консолидируем
for slug, dirs in slug_groups.items():
if len(dirs) <= 1:
continue
# Создаем целевую директорию
target_dir = root_directory / slug
target_dir.mkdir(exist_ok=True)
app_logger.info("[consolidate_archive_folders][State] Consolidating %d directories under %s", len(dirs), target_dir)
# Перемещаем содержимое
for source_dir in dirs:
if source_dir == target_dir:
continue
for item in source_dir.iterdir():
dest_item = target_dir / item.name
try:
if item.is_dir():
shutil.move(str(item), str(dest_item))
else:
shutil.move(str(item), str(dest_item))
except Exception as e:
app_logger.error("[consolidate_archive_folders][Failure] Failed to move %s to %s: %s", item, dest_item, e)
# Удаляем исходную директорию
try:
source_dir.rmdir()
app_logger.info("[consolidate_archive_folders][State] Removed source directory: %s", source_dir)
except Exception as e:
app_logger.error("[consolidate_archive_folders][Failure] Failed to remove source directory %s: %s", source_dir, e)
# [/DEF:consolidate_archive_folders:Function]
# [/DEF:backend.core.utils.fileio:Module]

View File

@@ -1,265 +1,286 @@
# [DEF:superset_tool.utils.network:Module]
#
# @SEMANTICS: network, http, client, api, requests, session, authentication
# @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.exceptions
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger
# @RELATION: DEPENDS_ON -> requests
# @PUBLIC_API: APIClient
# [SECTION: IMPORTS]
from typing import Optional, Dict, Any, List, Union, cast
import json
import io
from pathlib import Path
import requests
from requests.adapters import HTTPAdapter
import urllib3
from superset_tool.utils.logger import belief_scope
from urllib3.util.retry import Retry
from superset_tool.exceptions import AuthenticationError, NetworkError, DashboardNotFoundError, SupersetAPIError, PermissionDeniedError
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:APIClient:Class]
# @PURPOSE: Инкапсулирует HTTP-логику для работы с API, включая сессии, аутентификацию, и обработку запросов.
class APIClient:
DEFAULT_TIMEOUT = 30
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует API клиент с конфигурацией, сессией и логгером.
# @PARAM: config (Dict[str, Any]) - Конфигурация.
# @PARAM: verify_ssl (bool) - Проверять ли SSL.
# @PARAM: timeout (int) - Таймаут запросов.
# @PARAM: logger (Optional[SupersetLogger]) - Логгер.
# @PRE: config must contain 'base_url' and 'auth'.
# @POST: APIClient instance is initialized with a session.
def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT, logger: Optional[SupersetLogger] = None):
with belief_scope("__init__"):
self.logger = logger or SupersetLogger(name="APIClient")
self.logger.info("[APIClient.__init__][Entry] Initializing APIClient.")
self.base_url: str = config.get("base_url", "")
self.auth = config.get("auth")
self.request_settings = {"verify_ssl": verify_ssl, "timeout": timeout}
self.session = self._init_session()
self._tokens: Dict[str, str] = {}
self._authenticated = False
self.logger.info("[APIClient.__init__][Exit] APIClient initialized.")
# [/DEF:__init__:Function]
# [DEF:_init_session:Function]
# @PURPOSE: Создает и настраивает `requests.Session` с retry-логикой.
# @PRE: self.request_settings must be initialized.
# @POST: Returns a configured requests.Session instance.
# @RETURN: requests.Session - Настроенная сессия.
def _init_session(self) -> requests.Session:
with belief_scope("_init_session"):
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
if not self.request_settings["verify_ssl"]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
self.logger.warning("[_init_session][State] SSL verification disabled.")
session.verify = self.request_settings["verify_ssl"]
return session
# [/DEF:_init_session:Function]
# [DEF:authenticate:Function]
# @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены.
# @PRE: self.auth and self.base_url must be valid.
# @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`.
# @RETURN: Dict[str, str] - Словарь с токенами.
# @THROW: AuthenticationError, NetworkError - при ошибках.
def authenticate(self) -> Dict[str, str]:
with belief_scope("authenticate"):
self.logger.info("[authenticate][Enter] Authenticating to %s", self.base_url)
try:
login_url = f"{self.base_url}/security/login"
response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"])
response.raise_for_status()
access_token = response.json()["access_token"]
csrf_url = f"{self.base_url}/security/csrf_token/"
csrf_response = self.session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}, timeout=self.request_settings["timeout"])
csrf_response.raise_for_status()
self._tokens = {"access_token": access_token, "csrf_token": csrf_response.json()["result"]}
self._authenticated = True
self.logger.info("[authenticate][Exit] Authenticated successfully.")
return self._tokens
except requests.exceptions.HTTPError as e:
raise AuthenticationError(f"Authentication failed: {e}") from e
except (requests.exceptions.RequestException, KeyError) as e:
raise NetworkError(f"Network or parsing error during authentication: {e}") from e
# [/DEF:authenticate:Function]
@property
# [DEF:headers:Function]
# @PURPOSE: Возвращает HTTP-заголовки для аутентифицированных запросов.
# @PRE: APIClient is initialized and authenticated or can be authenticated.
# @POST: Returns headers including auth tokens.
def headers(self) -> Dict[str, str]:
with belief_scope("headers"):
if not self._authenticated: self.authenticate()
return {
"Authorization": f"Bearer {self._tokens['access_token']}",
"X-CSRFToken": self._tokens.get("csrf_token", ""),
"Referer": self.base_url,
"Content-Type": "application/json"
}
# [/DEF:headers:Function]
# [DEF:request:Function]
# @PURPOSE: Выполняет универсальный HTTP-запрос к API.
# @PARAM: method (str) - HTTP метод.
# @PARAM: endpoint (str) - API эндпоинт.
# @PARAM: headers (Optional[Dict]) - Дополнительные заголовки.
# @PARAM: raw_response (bool) - Возвращать ли сырой ответ.
# @PRE: method and endpoint must be strings.
# @POST: Returns response content or raw Response object.
# @RETURN: `requests.Response` если `raw_response=True`, иначе `dict`.
# @THROW: SupersetAPIError, NetworkError и их подклассы.
def request(self, method: str, endpoint: str, headers: Optional[Dict] = None, raw_response: bool = False, **kwargs) -> Union[requests.Response, Dict[str, Any]]:
with belief_scope("request"):
full_url = f"{self.base_url}{endpoint}"
_headers = self.headers.copy()
if headers: _headers.update(headers)
try:
response = self.session.request(method, full_url, headers=_headers, **kwargs)
response.raise_for_status()
return response if raw_response else response.json()
except requests.exceptions.HTTPError as e:
self._handle_http_error(e, endpoint)
except requests.exceptions.RequestException as e:
self._handle_network_error(e, full_url)
# [/DEF:request:Function]
# [DEF:_handle_http_error:Function]
# @PURPOSE: (Helper) Преобразует HTTP ошибки в кастомные исключения.
# @PARAM: e (requests.exceptions.HTTPError) - Ошибка.
# @PARAM: endpoint (str) - Эндпоинт.
# @PRE: e must be a valid HTTPError with a response.
# @POST: Raises a specific SupersetAPIError or subclass.
def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str):
with belief_scope("_handle_http_error"):
status_code = e.response.status_code
if status_code == 404: raise DashboardNotFoundError(endpoint) from e
if status_code == 403: raise PermissionDeniedError() from e
if status_code == 401: raise AuthenticationError() from e
raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e
# [/DEF:_handle_http_error:Function]
# [DEF:_handle_network_error:Function]
# @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`.
# @PARAM: e (requests.exceptions.RequestException) - Ошибка.
# @PARAM: url (str) - URL.
# @PRE: e must be a RequestException.
# @POST: Raises a NetworkError.
def _handle_network_error(self, e: requests.exceptions.RequestException, url: str):
with belief_scope("_handle_network_error"):
if isinstance(e, requests.exceptions.Timeout): msg = "Request timeout"
elif isinstance(e, requests.exceptions.ConnectionError): msg = "Connection error"
else: msg = f"Unknown network error: {e}"
raise NetworkError(msg, url=url) from e
# [/DEF:_handle_network_error:Function]
# [DEF:upload_file:Function]
# @PURPOSE: Загружает файл на сервер через multipart/form-data.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: file_info (Dict[str, Any]) - Информация о файле.
# @PARAM: extra_data (Optional[Dict]) - Дополнительные данные.
# @PARAM: timeout (Optional[int]) - Таймаут.
# @PRE: file_info must contain 'file_obj' and 'file_name'.
# @POST: File is uploaded and response returned.
# @RETURN: Ответ API в виде словаря.
# @THROW: SupersetAPIError, NetworkError, TypeError.
def upload_file(self, endpoint: str, file_info: Dict[str, Any], extra_data: Optional[Dict] = None, timeout: Optional[int] = None) -> Dict:
with belief_scope("upload_file"):
full_url = f"{self.base_url}{endpoint}"
_headers = self.headers.copy(); _headers.pop('Content-Type', None)
file_obj, file_name, form_field = file_info.get("file_obj"), file_info.get("file_name"), file_info.get("form_field", "file")
files_payload = {}
if isinstance(file_obj, (str, Path)):
with open(file_obj, 'rb') as f:
files_payload = {form_field: (file_name, f.read(), 'application/x-zip-compressed')}
elif isinstance(file_obj, io.BytesIO):
files_payload = {form_field: (file_name, file_obj.getvalue(), 'application/x-zip-compressed')}
else:
raise TypeError(f"Unsupported file_obj type: {type(file_obj)}")
return self._perform_upload(full_url, files_payload, extra_data, _headers, timeout)
# [/DEF:upload_file:Function]
# [DEF:_perform_upload:Function]
# @PURPOSE: (Helper) Выполняет POST запрос с файлом.
# @PARAM: url (str) - URL.
# @PARAM: files (Dict) - Файлы.
# @PARAM: data (Optional[Dict]) - Данные.
# @PARAM: headers (Dict) - Заголовки.
# @PARAM: timeout (Optional[int]) - Таймаут.
# @PRE: url, files, and headers must be provided.
# @POST: POST request is performed and JSON response returned.
# @RETURN: Dict - Ответ.
def _perform_upload(self, url: str, files: Dict, data: Optional[Dict], headers: Dict, timeout: Optional[int]) -> Dict:
with belief_scope("_perform_upload"):
try:
response = self.session.post(url, files=files, data=data or {}, headers=headers, timeout=timeout or self.request_settings["timeout"])
response.raise_for_status()
# Добавляем логирование для отладки
if response.status_code == 200:
try:
return response.json()
except Exception as json_e:
self.logger.debug(f"[_perform_upload][Debug] Response is not valid JSON: {response.text[:200]}...")
raise SupersetAPIError(f"API error during upload: Response is not valid JSON: {json_e}") from json_e
return response.json()
except requests.exceptions.HTTPError as e:
raise SupersetAPIError(f"API error during upload: {e.response.text}") from e
except requests.exceptions.RequestException as e:
raise NetworkError(f"Network error during upload: {e}", url=url) from e
# [/DEF:_perform_upload:Function]
# [DEF:fetch_paginated_count:Function]
# @PURPOSE: Получает общее количество элементов для пагинации.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: query_params (Dict) - Параметры запроса.
# @PARAM: count_field (str) - Поле с количеством.
# @PRE: query_params must be a dictionary.
# @POST: Returns total count of items.
# @RETURN: int - Количество.
def fetch_paginated_count(self, endpoint: str, query_params: Dict, count_field: str = "count") -> int:
with belief_scope("fetch_paginated_count"):
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query_params)}))
return response_json.get(count_field, 0)
# [/DEF:fetch_paginated_count:Function]
# [DEF:fetch_paginated_data:Function]
# @PURPOSE: Автоматически собирает данные со всех страниц пагинированного эндпоинта.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: pagination_options (Dict[str, Any]) - Опции пагинации.
# @PRE: pagination_options must contain 'base_query', 'total_count', 'results_field'.
# @POST: Returns all items across all pages.
# @RETURN: List[Any] - Список данных.
def fetch_paginated_data(self, endpoint: str, pagination_options: Dict[str, Any]) -> List[Any]:
with belief_scope("fetch_paginated_data"):
base_query, total_count = pagination_options["base_query"], pagination_options["total_count"]
results_field, page_size = pagination_options["results_field"], base_query.get('page_size')
assert page_size and page_size > 0, "'page_size' must be a positive number."
results = []
for page in range((total_count + page_size - 1) // page_size):
query = {**base_query, 'page': page}
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)}))
results.extend(response_json.get(results_field, []))
return results
# [/DEF:fetch_paginated_data:Function]
# [/DEF:APIClient:Class]
# [/DEF:superset_tool.utils.network:Module]
# [DEF:backend.core.utils.network:Module]
#
# @SEMANTICS: network, http, client, api, requests, session, authentication
# @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> backend.src.core.logger
# @RELATION: DEPENDS_ON -> requests
# @PUBLIC_API: APIClient
# [SECTION: IMPORTS]
from typing import Optional, Dict, Any, List, Union, cast
import json
import io
from pathlib import Path
import requests
from requests.adapters import HTTPAdapter
import urllib3
from urllib3.util.retry import Retry
from ..logger import logger as app_logger, belief_scope
# [/SECTION]
# [DEF:SupersetAPIError:Class]
class SupersetAPIError(Exception):
def __init__(self, message: str = "Superset API error", **context: Any):
self.context = context
super().__init__(f"[API_FAILURE] {message} | Context: {self.context}")
# [DEF:AuthenticationError:Class]
class AuthenticationError(SupersetAPIError):
def __init__(self, message: str = "Authentication failed", **context: Any):
super().__init__(message, type="authentication", **context)
# [DEF:PermissionDeniedError:Class]
class PermissionDeniedError(AuthenticationError):
def __init__(self, message: str = "Permission denied", **context: Any):
super().__init__(message, **context)
# [DEF:DashboardNotFoundError:Class]
class DashboardNotFoundError(SupersetAPIError):
def __init__(self, resource_id: Union[int, str], message: str = "Dashboard not found", **context: Any):
super().__init__(f"Dashboard '{resource_id}' {message}", subtype="not_found", resource_id=resource_id, **context)
# [DEF:NetworkError:Class]
class NetworkError(Exception):
def __init__(self, message: str = "Network connection failed", **context: Any):
self.context = context
super().__init__(f"[NETWORK_FAILURE] {message} | Context: {self.context}")
# [DEF:APIClient:Class]
# @PURPOSE: Инкапсулирует HTTP-логику для работы с API, включая сессии, аутентификацию, и обработку запросов.
class APIClient:
DEFAULT_TIMEOUT = 30
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует API клиент с конфигурацией, сессией и логгером.
# @PARAM: config (Dict[str, Any]) - Конфигурация.
# @PARAM: verify_ssl (bool) - Проверять ли SSL.
# @PARAM: timeout (int) - Таймаут запросов.
# @PRE: config must contain 'base_url' and 'auth'.
# @POST: APIClient instance is initialized with a session.
def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT):
with belief_scope("__init__"):
app_logger.info("[APIClient.__init__][Entry] Initializing APIClient.")
self.base_url: str = config.get("base_url", "")
self.auth = config.get("auth")
self.request_settings = {"verify_ssl": verify_ssl, "timeout": timeout}
self.session = self._init_session()
self._tokens: Dict[str, str] = {}
self._authenticated = False
app_logger.info("[APIClient.__init__][Exit] APIClient initialized.")
# [/DEF:__init__:Function]
# [DEF:_init_session:Function]
# @PURPOSE: Создает и настраивает `requests.Session` с retry-логикой.
# @PRE: self.request_settings must be initialized.
# @POST: Returns a configured requests.Session instance.
# @RETURN: requests.Session - Настроенная сессия.
def _init_session(self) -> requests.Session:
with belief_scope("_init_session"):
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
if not self.request_settings["verify_ssl"]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app_logger.warning("[_init_session][State] SSL verification disabled.")
session.verify = self.request_settings["verify_ssl"]
return session
# [/DEF:_init_session:Function]
# [DEF:authenticate:Function]
# @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены.
# @PRE: self.auth and self.base_url must be valid.
# @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`.
# @RETURN: Dict[str, str] - Словарь с токенами.
# @THROW: AuthenticationError, NetworkError - при ошибках.
def authenticate(self) -> Dict[str, str]:
with belief_scope("authenticate"):
app_logger.info("[authenticate][Enter] Authenticating to %s", self.base_url)
try:
login_url = f"{self.base_url}/security/login"
response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"])
response.raise_for_status()
access_token = response.json()["access_token"]
csrf_url = f"{self.base_url}/security/csrf_token/"
csrf_response = self.session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}, timeout=self.request_settings["timeout"])
csrf_response.raise_for_status()
self._tokens = {"access_token": access_token, "csrf_token": csrf_response.json()["result"]}
self._authenticated = True
app_logger.info("[authenticate][Exit] Authenticated successfully.")
return self._tokens
except requests.exceptions.HTTPError as e:
raise AuthenticationError(f"Authentication failed: {e}") from e
except (requests.exceptions.RequestException, KeyError) as e:
raise NetworkError(f"Network or parsing error during authentication: {e}") from e
# [/DEF:authenticate:Function]
@property
# [DEF:headers:Function]
# @PURPOSE: Возвращает HTTP-заголовки для аутентифицированных запросов.
# @PRE: APIClient is initialized and authenticated or can be authenticated.
# @POST: Returns headers including auth tokens.
def headers(self) -> Dict[str, str]:
with belief_scope("headers"):
if not self._authenticated: self.authenticate()
return {
"Authorization": f"Bearer {self._tokens['access_token']}",
"X-CSRFToken": self._tokens.get("csrf_token", ""),
"Referer": self.base_url,
"Content-Type": "application/json"
}
# [/DEF:headers:Function]
# [DEF:request:Function]
# @PURPOSE: Выполняет универсальный HTTP-запрос к API.
# @PARAM: method (str) - HTTP метод.
# @PARAM: endpoint (str) - API эндпоинт.
# @PARAM: headers (Optional[Dict]) - Дополнительные заголовки.
# @PARAM: raw_response (bool) - Возвращать ли сырой ответ.
# @PRE: method and endpoint must be strings.
# @POST: Returns response content or raw Response object.
# @RETURN: `requests.Response` если `raw_response=True`, иначе `dict`.
# @THROW: SupersetAPIError, NetworkError и их подклассы.
def request(self, method: str, endpoint: str, headers: Optional[Dict] = None, raw_response: bool = False, **kwargs) -> Union[requests.Response, Dict[str, Any]]:
with belief_scope("request"):
full_url = f"{self.base_url}{endpoint}"
_headers = self.headers.copy()
if headers: _headers.update(headers)
try:
response = self.session.request(method, full_url, headers=_headers, **kwargs)
response.raise_for_status()
return response if raw_response else response.json()
except requests.exceptions.HTTPError as e:
self._handle_http_error(e, endpoint)
except requests.exceptions.RequestException as e:
self._handle_network_error(e, full_url)
# [/DEF:request:Function]
# [DEF:_handle_http_error:Function]
# @PURPOSE: (Helper) Преобразует HTTP ошибки в кастомные исключения.
# @PARAM: e (requests.exceptions.HTTPError) - Ошибка.
# @PARAM: endpoint (str) - Эндпоинт.
# @PRE: e must be a valid HTTPError with a response.
# @POST: Raises a specific SupersetAPIError or subclass.
def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str):
with belief_scope("_handle_http_error"):
status_code = e.response.status_code
if status_code == 404: raise DashboardNotFoundError(endpoint) from e
if status_code == 403: raise PermissionDeniedError() from e
if status_code == 401: raise AuthenticationError() from e
raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e
# [/DEF:_handle_http_error:Function]
# [DEF:_handle_network_error:Function]
# @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`.
# @PARAM: e (requests.exceptions.RequestException) - Ошибка.
# @PARAM: url (str) - URL.
# @PRE: e must be a RequestException.
# @POST: Raises a NetworkError.
def _handle_network_error(self, e: requests.exceptions.RequestException, url: str):
with belief_scope("_handle_network_error"):
if isinstance(e, requests.exceptions.Timeout): msg = "Request timeout"
elif isinstance(e, requests.exceptions.ConnectionError): msg = "Connection error"
else: msg = f"Unknown network error: {e}"
raise NetworkError(msg, url=url) from e
# [/DEF:_handle_network_error:Function]
# [DEF:upload_file:Function]
# @PURPOSE: Загружает файл на сервер через multipart/form-data.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: file_info (Dict[str, Any]) - Информация о файле.
# @PARAM: extra_data (Optional[Dict]) - Дополнительные данные.
# @PARAM: timeout (Optional[int]) - Таймаут.
# @PRE: file_info must contain 'file_obj' and 'file_name'.
# @POST: File is uploaded and response returned.
# @RETURN: Ответ API в виде словаря.
# @THROW: SupersetAPIError, NetworkError, TypeError.
def upload_file(self, endpoint: str, file_info: Dict[str, Any], extra_data: Optional[Dict] = None, timeout: Optional[int] = None) -> Dict:
with belief_scope("upload_file"):
full_url = f"{self.base_url}{endpoint}"
_headers = self.headers.copy(); _headers.pop('Content-Type', None)
file_obj, file_name, form_field = file_info.get("file_obj"), file_info.get("file_name"), file_info.get("form_field", "file")
files_payload = {}
if isinstance(file_obj, (str, Path)):
with open(file_obj, 'rb') as f:
files_payload = {form_field: (file_name, f.read(), 'application/x-zip-compressed')}
elif isinstance(file_obj, io.BytesIO):
files_payload = {form_field: (file_name, file_obj.getvalue(), 'application/x-zip-compressed')}
else:
raise TypeError(f"Unsupported file_obj type: {type(file_obj)}")
return self._perform_upload(full_url, files_payload, extra_data, _headers, timeout)
# [/DEF:upload_file:Function]
# [DEF:_perform_upload:Function]
# @PURPOSE: (Helper) Выполняет POST запрос с файлом.
# @PARAM: url (str) - URL.
# @PARAM: files (Dict) - Файлы.
# @PARAM: data (Optional[Dict]) - Данные.
# @PARAM: headers (Dict) - Заголовки.
# @PARAM: timeout (Optional[int]) - Таймаут.
# @PRE: url, files, and headers must be provided.
# @POST: POST request is performed and JSON response returned.
# @RETURN: Dict - Ответ.
def _perform_upload(self, url: str, files: Dict, data: Optional[Dict], headers: Dict, timeout: Optional[int]) -> Dict:
with belief_scope("_perform_upload"):
try:
response = self.session.post(url, files=files, data=data or {}, headers=headers, timeout=timeout or self.request_settings["timeout"])
response.raise_for_status()
if response.status_code == 200:
try:
return response.json()
except Exception as json_e:
app_logger.debug(f"[_perform_upload][Debug] Response is not valid JSON: {response.text[:200]}...")
raise SupersetAPIError(f"API error during upload: Response is not valid JSON: {json_e}") from json_e
return response.json()
except requests.exceptions.HTTPError as e:
raise SupersetAPIError(f"API error during upload: {e.response.text}") from e
except requests.exceptions.RequestException as e:
raise NetworkError(f"Network error during upload: {e}", url=url) from e
# [/DEF:_perform_upload:Function]
# [DEF:fetch_paginated_count:Function]
# @PURPOSE: Получает общее количество элементов для пагинации.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: query_params (Dict) - Параметры запроса.
# @PARAM: count_field (str) - Поле с количеством.
# @PRE: query_params must be a dictionary.
# @POST: Returns total count of items.
# @RETURN: int - Количество.
def fetch_paginated_count(self, endpoint: str, query_params: Dict, count_field: str = "count") -> int:
with belief_scope("fetch_paginated_count"):
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query_params)}))
return response_json.get(count_field, 0)
# [/DEF:fetch_paginated_count:Function]
# [DEF:fetch_paginated_data:Function]
# @PURPOSE: Автоматически собирает данные со всех страниц пагинированного эндпоинта.
# @PARAM: endpoint (str) - Эндпоинт.
# @PARAM: pagination_options (Dict[str, Any]) - Опции пагинации.
# @PRE: pagination_options must contain 'base_query', 'total_count', 'results_field'.
# @POST: Returns all items across all pages.
# @RETURN: List[Any] - Список данных.
def fetch_paginated_data(self, endpoint: str, pagination_options: Dict[str, Any]) -> List[Any]:
with belief_scope("fetch_paginated_data"):
base_query, total_count = pagination_options["base_query"], pagination_options["total_count"]
results_field, page_size = pagination_options["results_field"], base_query.get('page_size')
assert page_size and page_size > 0, "'page_size' must be a positive number."
results = []
for page in range((total_count + page_size - 1) // page_size):
query = {**base_query, 'page': page}
response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)}))
results.extend(response_json.get(results_field, []))
return results
# [/DEF:fetch_paginated_data:Function]
# [/DEF:APIClient:Class]
# [/DEF:backend.core.utils.network:Module]

View File

@@ -12,10 +12,9 @@ from requests.exceptions import RequestException
from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope
from superset_tool.client import SupersetClient
from superset_tool.exceptions import SupersetAPIError
from superset_tool.utils.logger import SupersetLogger
from superset_tool.utils.fileio import (
from ..core.superset_client import SupersetClient
from ..core.utils.network import SupersetAPIError
from ..core.utils.fileio import (
save_and_unpack_dashboard,
archive_exports,
sanitize_filename,
@@ -23,7 +22,6 @@ from superset_tool.utils.fileio import (
remove_empty_directories,
RetentionPolicy
)
from superset_tool.utils.init_clients import setup_clients
from ..dependencies import get_config_manager
# [DEF:BackupPlugin:Class]
@@ -131,25 +129,25 @@ class BackupPlugin(PluginBase):
backup_path_str = params.get("backup_path") or config_manager.get_config().settings.backup_path
backup_path = Path(backup_path_str)
logger = SupersetLogger(log_dir=backup_path / "Logs", console=True)
logger.info(f"[BackupPlugin][Entry] Starting backup for {env}.")
from ..core.logger import logger as app_logger
app_logger.info(f"[BackupPlugin][Entry] Starting backup for {env}.")
try:
config_manager = get_config_manager()
if not config_manager.has_environments():
raise ValueError("No Superset environments configured. Please add an environment in Settings.")
clients = setup_clients(logger, custom_envs=config_manager.get_environments())
client = clients.get(env)
if not client:
env_config = config_manager.get_environment(env)
if not env_config:
raise ValueError(f"Environment '{env}' not found in configuration.")
client = SupersetClient(env_config)
dashboard_count, dashboard_meta = client.get_dashboards()
logger.info(f"[BackupPlugin][Progress] Found {dashboard_count} dashboards to export in {env}.")
app_logger.info(f"[BackupPlugin][Progress] Found {dashboard_count} dashboards to export in {env}.")
if dashboard_count == 0:
logger.info("[BackupPlugin][Exit] No dashboards to back up.")
app_logger.info("[BackupPlugin][Exit] No dashboards to back up.")
return
for db in dashboard_meta:
@@ -169,23 +167,22 @@ class BackupPlugin(PluginBase):
zip_content=zip_content,
original_filename=filename,
output_dir=dashboard_dir,
unpack=False,
logger=logger
unpack=False
)
archive_exports(str(dashboard_dir), policy=RetentionPolicy(), logger=logger)
archive_exports(str(dashboard_dir), policy=RetentionPolicy())
except (SupersetAPIError, RequestException, IOError, OSError) as db_error:
logger.error(f"[BackupPlugin][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True)
app_logger.error(f"[BackupPlugin][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True)
continue
consolidate_archive_folders(backup_path / env.upper(), logger=logger)
remove_empty_directories(str(backup_path / env.upper()), logger=logger)
consolidate_archive_folders(backup_path / env.upper())
remove_empty_directories(str(backup_path / env.upper()))
logger.info(f"[BackupPlugin][CoherenceCheck:Passed] Backup logic completed for {env}.")
app_logger.info(f"[BackupPlugin][CoherenceCheck:Passed] Backup logic completed for {env}.")
except (RequestException, IOError, KeyError) as e:
logger.critical(f"[BackupPlugin][Failure] Fatal error during backup for {env}: {e}", exc_info=True)
app_logger.critical(f"[BackupPlugin][Failure] Fatal error during backup for {env}: {e}", exc_info=True)
raise e
# [/DEF:execute:Function]
# [/DEF:BackupPlugin:Class]

View File

@@ -145,19 +145,7 @@ class DebugPlugin(PluginBase):
if not env_config:
raise ValueError(f"Environment '{name}' not found.")
# Map Environment model to SupersetConfig
from superset_tool.models import SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client = SupersetClient(env_config)
client.authenticate()
count, dbs = client.get_databases()
results[name] = {
@@ -188,19 +176,7 @@ class DebugPlugin(PluginBase):
if not env_config:
raise ValueError(f"Environment '{env_name}' not found.")
# Map Environment model to SupersetConfig
from superset_tool.models import SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client = SupersetClient(env_config)
client.authenticate()
dataset_response = client.get_dataset(dataset_id)

View File

@@ -12,8 +12,7 @@ from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
from ..core.database import SessionLocal
from ..models.connection import ConnectionConfig
from superset_tool.utils.dataset_mapper import DatasetMapper
from superset_tool.utils.logger import SupersetLogger
from ..core.utils.dataset_mapper import DatasetMapper
# [/SECTION]
# [DEF:MapperPlugin:Class]
@@ -137,25 +136,13 @@ class MapperPlugin(PluginBase):
# Get config and initialize client
from ..dependencies import get_config_manager
from superset_tool.models import SupersetConfig
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
# Map Environment model to SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client = SupersetClient(env_config)
client.authenticate()
postgres_config = None
@@ -185,9 +172,7 @@ class MapperPlugin(PluginBase):
logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}")
# Use internal SupersetLogger for DatasetMapper
s_logger = SupersetLogger(name="dataset_mapper_plugin")
mapper = DatasetMapper(s_logger)
mapper = DatasetMapper()
try:
mapper.run_mapping(

View File

@@ -13,11 +13,9 @@ import re
from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope
from superset_tool.client import SupersetClient
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from ..core.superset_client import SupersetClient
from ..core.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from ..dependencies import get_config_manager
from superset_tool.utils.logger import SupersetLogger
from ..core.migration_engine import MigrationEngine
from ..core.database import SessionLocal
from ..models.mapping import DatabaseMapping, Environment
@@ -150,7 +148,7 @@ class MigrationPlugin(PluginBase):
from ..dependencies import get_task_manager
tm = get_task_manager()
class TaskLoggerProxy(SupersetLogger):
class TaskLoggerProxy:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the proxy logger.
# @PRE: None.
@@ -158,7 +156,7 @@ class MigrationPlugin(PluginBase):
def __init__(self):
with belief_scope("__init__"):
# Initialize parent with dummy values since we override methods
super().__init__(console=False)
pass
# [/DEF:__init__:Function]
# [DEF:debug:Function]
@@ -246,9 +244,8 @@ class MigrationPlugin(PluginBase):
logger.info(f"[MigrationPlugin][State] Resolved environments: {from_env_name} -> {to_env_name}")
all_clients = setup_clients(logger, custom_envs=environments)
from_c = all_clients.get(from_env_name)
to_c = all_clients.get(to_env_name)
from_c = SupersetClient(src_env)
to_c = SupersetClient(tgt_env)
if not from_c or not to_c:
raise ValueError(f"Clients not initialized for environments: {from_env_name}, {to_env_name}")

View File

@@ -106,25 +106,13 @@ class SearchPlugin(PluginBase):
# Get config and initialize client
from ..dependencies import get_config_manager
from superset_tool.models import SupersetConfig
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
# Map Environment model to SupersetConfig
superset_config = SupersetConfig(
env=env_config.name,
base_url=env_config.url,
auth={
"provider": "db", # Defaulting to db provider
"username": env_config.username,
"password": env_config.password,
"refresh": "false"
}
)
client = SupersetClient(superset_config)
client = SupersetClient(env_config)
client.authenticate()
logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}")

View File

@@ -13,7 +13,6 @@ from typing import List, Dict
from backend.src.core.logger import belief_scope
from backend.src.core.superset_client import SupersetClient
from backend.src.core.utils.matching import suggest_mappings
from superset_tool.models import SupersetConfig
# [/SECTION]
# [DEF:MappingService:Class]
@@ -43,17 +42,7 @@ class MappingService:
if not env:
raise ValueError(f"Environment {env_id} not found")
superset_config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={
"provider": "db",
"username": env.username,
"password": env.password,
"refresh": "false"
}
)
return SupersetClient(superset_config)
return SupersetClient(env)
# [/DEF:_get_client:Function]
# [DEF:get_suggestions:Function]

Binary file not shown.

View File

@@ -1,99 +0,0 @@
#!/usr/bin/env python3
"""Test script to verify the fixes for SupersetClient initialization."""
import sys
sys.path.insert(0, '.')
from src.core.config_manager import ConfigManager
from src.core.config_models import Environment
from src.plugins.search import SearchPlugin
from src.plugins.mapper import MapperPlugin
from src.plugins.debug import DebugPlugin
def test_config_manager():
"""Test ConfigManager methods."""
print("Testing ConfigManager...")
try:
config_manager = ConfigManager()
print(f" ConfigManager initialized")
# Test get_environment method
if hasattr(config_manager, 'get_environment'):
print(f" get_environment method exists")
# Add a test environment if none exists
if not config_manager.has_environments():
test_env = Environment(
id="test-env",
name="Test Environment",
url="http://localhost:8088",
username="admin",
password="admin"
)
config_manager.add_environment(test_env)
print(f" Added test environment: {test_env.name}")
# Test retrieving environment
envs = config_manager.get_environments()
if envs:
test_env_id = envs[0].id
env_config = config_manager.get_environment(test_env_id)
print(f" Successfully retrieved environment: {env_config.name}")
return True
else:
print(f" No environments available (add one in settings)")
return False
except Exception as e:
print(f" Error: {e}")
return False
def test_plugins():
"""Test plugin initialization."""
print("\nTesting plugins...")
plugins = [
("Search Plugin", SearchPlugin()),
("Mapper Plugin", MapperPlugin()),
("Debug Plugin", DebugPlugin())
]
all_ok = True
for name, plugin in plugins:
print(f"\nTesting {name}...")
try:
plugin_id = plugin.id
plugin_name = plugin.name
plugin_version = plugin.version
schema = plugin.get_schema()
print(f" ✓ ID: {plugin_id}")
print(f" ✓ Name: {plugin_name}")
print(f" ✓ Version: {plugin_version}")
print(f" ✓ Schema: {schema}")
except Exception as e:
print(f" ✗ Error: {e}")
all_ok = False
return all_ok
def main():
"""Main test function."""
print("=" * 50)
print("Superset Tools Fix Verification")
print("=" * 50)
config_ok = test_config_manager()
plugins_ok = test_plugins()
print("\n" + "=" * 50)
if config_ok and plugins_ok:
print("✅ All fixes verified successfully!")
else:
print("❌ Some tests failed")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,5 @@
import pytest
from backend.src.core.logger import belief_scope, logger
from src.core.logger import belief_scope, logger
# [DEF:test_belief_scope_logs_entry_action_exit:Function]

View File

@@ -1,62 +1,23 @@
import pytest
from superset_tool.models import SupersetConfig
from superset_tool.utils.logger import belief_scope
from src.core.config_models import Environment
from src.core.logger import belief_scope
# [DEF:test_superset_config_url_normalization:Function]
# @PURPOSE: Tests that SupersetConfig correctly normalizes the base URL.
# @PRE: SupersetConfig class is available.
# @POST: URL normalization is verified.
def test_superset_config_url_normalization():
with belief_scope("test_superset_config_url_normalization"):
auth = {
"provider": "db",
"username": "admin",
"password": "password",
"refresh": "token"
}
# Test with /api/v1 already present
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088/api/v1",
auth=auth
# [DEF:test_environment_model:Function]
# @PURPOSE: Tests that Environment model correctly stores values.
# @PRE: Environment class is available.
# @POST: Values are verified.
def test_environment_model():
with belief_scope("test_environment_model"):
env = Environment(
id="test-id",
name="test-env",
url="http://localhost:8088/api/v1",
username="admin",
password="password"
)
assert config.base_url == "http://localhost:8088/api/v1"
# Test without /api/v1
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088",
auth=auth
)
assert config.base_url == "http://localhost:8088/api/v1"
# Test with trailing slash
config = SupersetConfig(
env="dev",
base_url="http://localhost:8088/",
auth=auth
)
assert config.base_url == "http://localhost:8088/api/v1"
assert env.id == "test-id"
assert env.name == "test-env"
assert env.url == "http://localhost:8088/api/v1"
# [/DEF:test_superset_config_url_normalization:Function]
# [DEF:test_superset_config_invalid_url:Function]
# @PURPOSE: Tests that SupersetConfig raises ValueError for invalid URLs.
# @PRE: SupersetConfig class is available.
# @POST: ValueError is raised for invalid URLs.
def test_superset_config_invalid_url():
with belief_scope("test_superset_config_invalid_url"):
auth = {
"provider": "db",
"username": "admin",
"password": "password",
"refresh": "token"
}
with pytest.raises(ValueError, match="Must start with http:// or https://"):
SupersetConfig(
env="dev",
base_url="localhost:8088",
auth=auth
)
# [/DEF:test_superset_config_invalid_url:Function]

View File

@@ -1,413 +0,0 @@
# [DEF:migration_script:Module]
#
# @SEMANTICS: migration, cli, superset, ui, logging, error-recovery, batch-delete
# @PURPOSE: Предоставляет интерактивный CLI для миграции дашбордов Superset между окружениями с возможностью восстановления после ошибок.
# @LAYER: App
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @PUBLIC_API: Migration
# [SECTION: IMPORTS]
import json
import logging
import sys
import zipfile
import re
from pathlib import Path
from typing import List, Optional, Tuple, Dict
from superset_tool.client import SupersetClient
from superset_tool.utils.init_clients import setup_clients
from superset_tool.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from superset_tool.utils.whiptail_fallback import menu, checklist, yesno, msgbox, inputbox, gauge
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:Migration:Class]
# @PURPOSE: Инкапсулирует логику интерактивной миграции дашбордов с возможностью «удалить‑и‑перезаписать» при ошибке импорта.
# @RELATION: CREATES_INSTANCE_OF -> SupersetLogger
# @RELATION: USES -> SupersetClient
class Migration:
"""
Интерактивный процесс миграции дашбордов.
"""
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует сервис миграции, настраивает логгер и начальные состояния.
# @PRE: None.
# @POST: `self.logger` готов к использованию; `enable_delete_on_failure` = `False`.
def __init__(self) -> None:
default_log_dir = Path.cwd() / "logs"
self.logger = SupersetLogger(
name="migration_script",
log_dir=default_log_dir,
level=logging.INFO,
console=True,
)
self.enable_delete_on_failure = False
self.from_c: Optional[SupersetClient] = None
self.to_c: Optional[SupersetClient] = None
self.dashboards_to_migrate: List[dict] = []
self.db_config_replacement: Optional[dict] = None
self._failed_imports: List[dict] = []
# [/DEF:__init__:Function]
# [DEF:run:Function]
# @PURPOSE: Точка входа последовательный запуск всех шагов миграции.
# @PRE: Логгер готов.
# @POST: Скрипт завершён, пользователю выведено сообщение.
# @RELATION: CALLS -> self.ask_delete_on_failure
# @RELATION: CALLS -> self.select_environments
# @RELATION: CALLS -> self.select_dashboards
# @RELATION: CALLS -> self.confirm_db_config_replacement
# @RELATION: CALLS -> self.execute_migration
def run(self) -> None:
with self.logger.belief_scope("Migration.run"):
self.logger.info("[run][Entry] Запуск скрипта миграции.")
self.ask_delete_on_failure()
self.select_environments()
self.select_dashboards()
self.confirm_db_config_replacement()
self.execute_migration()
self.logger.info("[run][Exit] Скрипт миграции завершён.")
# [/DEF:run:Function]
# [DEF:ask_delete_on_failure:Function]
# @PURPOSE: Запрашивает у пользователя, следует ли удалять дашборд при ошибке импорта.
# @PRE: None.
# @POST: `self.enable_delete_on_failure` установлен.
# @RELATION: CALLS -> yesno
def ask_delete_on_failure(self) -> None:
with self.logger.belief_scope("Migration.ask_delete_on_failure"):
self.enable_delete_on_failure = yesno(
"Поведение при ошибке импорта",
"Если импорт завершится ошибкой, удалить существующий дашборд и попытаться импортировать заново?",
)
self.logger.info(
"[ask_delete_on_failure][State] Delete-on-failure = %s",
self.enable_delete_on_failure,
)
# [/DEF:ask_delete_on_failure:Function]
# [DEF:select_environments:Function]
# @PURPOSE: Позволяет пользователю выбрать исходное и целевое окружения Superset.
# @PRE: `setup_clients` успешно инициализирует все клиенты.
# @POST: `self.from_c` и `self.to_c` установлены.
# @RELATION: CALLS -> setup_clients
# @RELATION: CALLS -> menu
def select_environments(self) -> None:
with self.logger.belief_scope("Migration.select_environments"):
self.logger.info("[select_environments][Entry] Шаг 1/5: Выбор окружений.")
try:
all_clients = setup_clients(self.logger)
available_envs = list(all_clients.keys())
except Exception as e:
self.logger.error("[select_environments][Failure] %s", e, exc_info=True)
msgbox("Ошибка", "Не удалось инициализировать клиенты.")
return
rc, from_env_name = menu(
title="Выбор окружения",
prompt="Исходное окружение:",
choices=available_envs,
)
if rc != 0 or from_env_name is None:
self.logger.info("[select_environments][State] Source environment selection cancelled.")
return
self.from_c = all_clients[from_env_name]
self.logger.info("[select_environments][State] from = %s", from_env_name)
available_envs.remove(from_env_name)
rc, to_env_name = menu(
title="Выбор окружения",
prompt="Целевое окружение:",
choices=available_envs,
)
if rc != 0 or to_env_name is None:
self.logger.info("[select_environments][State] Target environment selection cancelled.")
return
self.to_c = all_clients[to_env_name]
self.logger.info("[select_environments][State] to = %s", to_env_name)
self.logger.info("[select_environments][Exit] Шаг 1 завершён.")
# [/DEF:select_environments:Function]
# [DEF:select_dashboards:Function]
# @PURPOSE: Позволяет пользователю выбрать набор дашбордов для миграции.
# @PRE: `self.from_c` инициализирован.
# @POST: `self.dashboards_to_migrate` заполнен.
# @RELATION: CALLS -> self.from_c.get_dashboards
# @RELATION: CALLS -> checklist
def select_dashboards(self) -> None:
with self.logger.belief_scope("Migration.select_dashboards"):
self.logger.info("[select_dashboards][Entry] Шаг 2/5: Выбор дашбордов.")
if self.from_c is None:
self.logger.error("[select_dashboards][Failure] Source client not initialized.")
msgbox("Ошибка", "Исходное окружение не выбрано.")
return
try:
_, all_dashboards = self.from_c.get_dashboards()
if not all_dashboards:
self.logger.warning("[select_dashboards][State] No dashboards.")
msgbox("Информация", "В исходном окружении нет дашбордов.")
return
rc, regex = inputbox("Поиск", "Введите регулярное выражение для поиска дашбордов:")
if rc != 0:
return
# Ensure regex is a string and perform caseinsensitive search
regex_str = str(regex)
filtered_dashboards = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
options = [("ALL", "Все дашборды")] + [
(str(d["id"]), d["dashboard_title"]) for d in filtered_dashboards
]
rc, selected = checklist(
title="Выбор дашбордов",
prompt="Отметьте нужные дашборды (введите номера):",
options=options,
)
if rc != 0:
return
if "ALL" in selected:
self.dashboards_to_migrate = filtered_dashboards
else:
self.dashboards_to_migrate = [
d for d in filtered_dashboards if str(d["id"]) in selected
]
self.logger.info(
"[select_dashboards][State] Выбрано %d дашбордов.",
len(self.dashboards_to_migrate),
)
except Exception as e:
self.logger.error("[select_dashboards][Failure] %s", e, exc_info=True)
msgbox("Ошибка", "Не удалось получить список дашбордов.")
self.logger.info("[select_dashboards][Exit] Шаг 2 завершён.")
# [/DEF:select_dashboards:Function]
# [DEF:confirm_db_config_replacement:Function]
# @PURPOSE: Запрашивает у пользователя, требуется ли заменить имена БД в YAML-файлах.
# @PRE: None.
# @POST: `self.db_config_replacement` либо `None`, либо заполнен.
# @RELATION: CALLS -> yesno
# @RELATION: CALLS -> self._select_databases
def confirm_db_config_replacement(self) -> None:
with self.logger.belief_scope("Migration.confirm_db_config_replacement"):
if yesno("Замена БД", "Заменить конфигурацию БД в YAMLфайлах?"):
old_db, new_db = self._select_databases()
if not old_db or not new_db:
self.logger.info("[confirm_db_config_replacement][State] Selection cancelled.")
return
print(f"old_db: {old_db}")
old_result = old_db.get("result", {})
new_result = new_db.get("result", {})
self.db_config_replacement = {
"old": {
"database_name": old_result.get("database_name"),
"uuid": old_result.get("uuid"),
"database_uuid": old_result.get("uuid"),
"id": str(old_db.get("id"))
},
"new": {
"database_name": new_result.get("database_name"),
"uuid": new_result.get("uuid"),
"database_uuid": new_result.get("uuid"),
"id": str(new_db.get("id"))
}
}
self.logger.info("[confirm_db_config_replacement][State] Replacement set: %s", self.db_config_replacement)
else:
self.logger.info("[confirm_db_config_replacement][State] Skipped.")
# [/DEF:confirm_db_config_replacement:Function]
# [DEF:_select_databases:Function]
# @PURPOSE: Позволяет пользователю выбрать исходную и целевую БД через API.
# @PRE: Clients are initialized.
# @POST: Возвращает кортеж (старая БД, новая БД) или (None, None) при отмене.
# @RELATION: CALLS -> self.from_c.get_databases
# @RELATION: CALLS -> self.to_c.get_databases
# @RELATION: CALLS -> self.from_c.get_database
# @RELATION: CALLS -> self.to_c.get_database
# @RELATION: CALLS -> menu
def _select_databases(self) -> Tuple[Optional[Dict], Optional[Dict]]:
with self.logger.belief_scope("Migration._select_databases"):
self.logger.info("[_select_databases][Entry] Selecting databases from both environments.")
if self.from_c is None or self.to_c is None:
self.logger.error("[_select_databases][Failure] Source or target client not initialized.")
msgbox("Ошибка", "Исходное или целевое окружение не выбрано.")
return None, None
# Получаем список БД из обоих окружений
try:
_, from_dbs = self.from_c.get_databases()
_, to_dbs = self.to_c.get_databases()
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch databases: %s", e)
msgbox("Ошибка", "Не удалось получить список баз данных.")
return None, None
# Формируем список для выбора
# По Swagger документации, в ответе API поле называется "database_name"
from_choices = []
for db in from_dbs:
db_name = db.get("database_name", "Без имени")
from_choices.append((str(db["id"]), f"{db_name} (ID: {db['id']})"))
to_choices = []
for db in to_dbs:
db_name = db.get("database_name", "Без имени")
to_choices.append((str(db["id"]), f"{db_name} (ID: {db['id']})"))
# Показываем список БД для исходного окружения
rc, from_sel = menu(
title="Выбор исходной БД",
prompt="Выберите исходную БД:",
choices=[f"{name}" for id, name in from_choices]
)
if rc != 0:
return None, None
# Определяем выбранную БД
from_db_id = from_choices[[choice[1] for choice in from_choices].index(from_sel)][0]
# Получаем полную информацию о выбранной БД из исходного окружения
try:
from_db = self.from_c.get_database(int(from_db_id))
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch database details: %s", e)
msgbox("Ошибка", "Не удалось получить информацию о выбранной базе данных.")
return None, None
# Показываем список БД для целевого окружения
rc, to_sel = menu(
title="Выбор целевой БД",
prompt="Выберите целевую БД:",
choices=[f"{name}" for id, name in to_choices]
)
if rc != 0:
return None, None
# Определяем выбранную БД
to_db_id = to_choices[[choice[1] for choice in to_choices].index(to_sel)][0]
# Получаем полную информацию о выбранной БД из целевого окружения
try:
to_db = self.to_c.get_database(int(to_db_id))
except Exception as e:
self.logger.error("[_select_databases][Failure] Failed to fetch database details: %s", e)
msgbox("Ошибка", "Не удалось получить информацию о выбранной базе данных.")
return None, None
self.logger.info("[_select_databases][Exit] Selected databases: %s -> %s", from_db.get("database_name", "Без имени"), to_db.get("database_name", "Без имени"))
return from_db, to_db
# [/DEF:_select_databases:Function]
# [DEF:_batch_delete_by_ids:Function]
# @PURPOSE: Удаляет набор дашбордов по их ID единым запросом.
# @PRE: `ids` непустой список целых чисел.
# @POST: Все указанные дашборды удалены (если они существовали).
# @RELATION: CALLS -> self.to_c.network.request
# @PARAM: ids (List[int]) - Список ID дашбордов для удаления.
def _batch_delete_by_ids(self, ids: List[int]) -> None:
with self.logger.belief_scope("Migration._batch_delete_by_ids", f"ids={ids}"):
if not ids:
self.logger.debug("[_batch_delete_by_ids][Skip] Empty ID list nothing to delete.")
return
if self.to_c is None:
self.logger.error("[_batch_delete_by_ids][Failure] Target client not initialized.")
msgbox("Ошибка", "Целевое окружение не выбрано.")
return
self.logger.info("[_batch_delete_by_ids][Entry] Deleting dashboards IDs: %s", ids)
q_param = json.dumps(ids)
response = self.to_c.network.request(method="DELETE", endpoint="/dashboard/", params={"q": q_param})
if isinstance(response, dict) and response.get("result", True) is False:
self.logger.warning("[_batch_delete_by_ids][Warning] Unexpected delete response: %s", response)
else:
self.logger.info("[_batch_delete_by_ids][Success] Delete request completed.")
# [/DEF:_batch_delete_by_ids:Function]
# [DEF:execute_migration:Function]
# @PURPOSE: Выполняет экспорт-импорт дашбордов, обрабатывает ошибки и, при необходимости, выполняет процедуру восстановления.
# @PRE: `self.dashboards_to_migrate` не пуст; `self.from_c` и `self.to_c` инициализированы.
# @POST: Успешные дашборды импортированы; неудачные - восстановлены или залогированы.
# @RELATION: CALLS -> self.from_c.export_dashboard
# @RELATION: CALLS -> create_temp_file
# @RELATION: CALLS -> update_yamls
# @RELATION: CALLS -> create_dashboard_export
# @RELATION: CALLS -> self.to_c.import_dashboard
# @RELATION: CALLS -> self._batch_delete_by_ids
def execute_migration(self) -> None:
with self.logger.belief_scope("Migration.execute_migration"):
if not self.dashboards_to_migrate:
self.logger.warning("[execute_migration][Skip] No dashboards to migrate.")
msgbox("Информация", "Нет дашбордов для миграции.")
return
if self.from_c is None or self.to_c is None:
self.logger.error("[execute_migration][Failure] Source or target client not initialized.")
msgbox("Ошибка", "Исходное или целевое окружение не выбрано.")
return
total = len(self.dashboards_to_migrate)
self.logger.info("[execute_migration][Entry] Starting migration of %d dashboards.", total)
self.to_c.delete_before_reimport = self.enable_delete_on_failure
with gauge("Миграция...", width=60, height=10) as g:
for i, dash in enumerate(self.dashboards_to_migrate):
dash_id, dash_slug, title = dash["id"], dash.get("slug"), dash["dashboard_title"]
g.set_text(f"Миграция: {title} ({i + 1}/{total})")
g.set_percent(int((i / total) * 100))
exported_content = None # Initialize exported_content
try:
exported_content, _ = self.from_c.export_dashboard(dash_id)
with create_temp_file(content=exported_content, dry_run=True, suffix=".zip", logger=self.logger) as tmp_zip_path, \
create_temp_file(suffix=".dir", logger=self.logger) as tmp_unpack_dir:
if not self.db_config_replacement:
self.to_c.import_dashboard(file_name=tmp_zip_path, dash_id=dash_id, dash_slug=dash_slug)
else:
with zipfile.ZipFile(tmp_zip_path, "r") as zip_ref:
zip_ref.extractall(tmp_unpack_dir)
if self.db_config_replacement:
update_yamls(db_configs=[self.db_config_replacement], path=str(tmp_unpack_dir))
with create_temp_file(suffix=".zip", dry_run=True, logger=self.logger) as tmp_new_zip:
create_dashboard_export(zip_path=tmp_new_zip, source_paths=[str(p) for p in Path(tmp_unpack_dir).glob("**/*")])
self.to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
self.logger.info("[execute_migration][Success] Dashboard %s imported.", title)
except Exception as exc:
self.logger.error("[execute_migration][Failure] %s", exc, exc_info=True)
self._failed_imports.append({"slug": dash_slug, "dash_id": dash_id, "zip_content": exported_content})
msgbox("Ошибка", f"Не удалось мигрировать дашборд {title}.\n\n{exc}")
g.set_percent(100)
if self.enable_delete_on_failure and self._failed_imports:
self.logger.info("[execute_migration][Recovery] %d dashboards failed. Starting recovery.", len(self._failed_imports))
_, target_dashboards = self.to_c.get_dashboards()
slug_to_id = {d["slug"]: d["id"] for d in target_dashboards if "slug" in d and "id" in d}
ids_to_delete = [slug_to_id[f["slug"]] for f in self._failed_imports if f["slug"] in slug_to_id]
self._batch_delete_by_ids(ids_to_delete)
for fail in self._failed_imports:
with create_temp_file(content=fail["zip_content"], suffix=".zip", logger=self.logger) as retry_zip:
self.to_c.import_dashboard(file_name=retry_zip, dash_id=fail["dash_id"], dash_slug=fail["slug"])
self.logger.info("[execute_migration][Recovered] Dashboard slug '%s' re-imported.", fail["slug"])
self.logger.info("[execute_migration][Exit] Migration finished.")
msgbox("Ошибка" if self._failed_imports else "Информация", "Миграция завершена!")
# [/DEF:execute_migration:Function]
# [/DEF:Migration:Class]
if __name__ == "__main__":
Migration().run()
# [/DEF:migration_script:Module]

View File

@@ -0,0 +1,34 @@
# Specification Quality Checklist: Backend Refactoring - Remove superset_tool
**Purpose**: Validate specification completeness and quality before proceeding to planning
**Created**: 2026-01-22
**Feature**: [Link to spec.md](../spec.md)
## Content Quality
- [x] No implementation details (languages, frameworks, APIs) - *Refactoring is technical by nature, but focused on structural goals.*
- [x] Focused on user value and business needs
- [x] Written for non-technical stakeholders
- [x] All mandatory sections completed
## Requirement Completeness
- [x] No [NEEDS CLARIFICATION] markers remain
- [x] Requirements are testable and unambiguous
- [x] Success criteria are measurable
- [x] Success criteria are technology-agnostic (no implementation details)
- [x] All acceptance scenarios are defined
- [x] Edge cases are identified
- [x] Scope is clearly bounded
- [x] Dependencies and assumptions identified
## Feature Readiness
- [x] All functional requirements have clear acceptance criteria
- [x] User scenarios cover primary flows
- [x] Feature meets measurable outcomes defined in Success Criteria
- [x] No implementation details leak into specification
## Notes
- Items marked incomplete require spec updates before `/speckit.clarify` or `/speckit.plan`

View File

@@ -0,0 +1,39 @@
# API Contracts: 012-remove-superset-tool
## SupersetClient Internal API
The `SupersetClient` will provide the following methods for internal backend use:
### `get_dashboards(query: Optional[Dict] = None) -> Tuple[int, List[Dict]]`
- **Purpose**: Fetches paginated dashboards.
- **Contract**:
- `query`: Optional filters and pagination parameters.
- Returns: `(total_count, dashboard_list)`.
### `export_dashboard(dashboard_id: int) -> Tuple[bytes, str]`
- **Purpose**: Exports a dashboard as a ZIP file.
- **Contract**:
- `dashboard_id`: ID of the dashboard.
- Returns: `(zip_content, filename)`.
### `import_dashboard(file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict`
- **Purpose**: Imports a dashboard from a ZIP file.
- **Contract**:
- `file_name`: Path to ZIP.
- `dash_id`/`dash_slug`: Optional identifiers for delete-retry logic.
- Returns: API response dictionary.
## Configuration Models (Pydantic)
### `Environment`
```python
class Environment(BaseModel):
id: str
name: str
url: str
username: str
password: str
verify_ssl: bool = True
timeout: int = 30
is_default: bool = False
backup_schedule: Schedule = Field(default_factory=Schedule)

View File

@@ -0,0 +1,38 @@
# Data Model: 012-remove-superset-tool
## Entities
### 1. Environment (Updated)
- **Source**: `backend/src/core/config_models.py`
- **Purpose**: Represents a Superset environment configuration, now incorporating all fields required for the `SupersetClient`.
- **Fields**:
- `id`: `str` (Unique identifier)
- `name`: `str` (Display name)
- `url`: `str` (Base URL for API)
- `username`: `str`
- `password`: `str`
- `verify_ssl`: `bool` (Default: `True`)
- `timeout`: `int` (Default: `30`)
- `is_default`: `bool` (Default: `False`)
- `backup_schedule`: `Schedule`
- **Validation Rules**:
- `url` must start with `http://` or `https://`.
- `timeout` must be positive.
### 2. API Contract (SupersetClient)
- **Source**: `backend/src/core/superset_client.py`
- **Purpose**: Self-contained client for Superset API interactions.
- **Key Methods**:
- `authenticate()`
- `get_dashboards(query)`
- `get_datasets(query)`
- `get_databases(query)`
- `export_dashboard(dashboard_id)`
- `import_dashboard(file_name, dash_id, dash_slug)`
- `delete_dashboard(dashboard_id)`
- `get_databases_summary()`
- `get_dashboards_summary()`
## State Transitions
- **Authentication**: Unauthenticated -> Authenticated (token stored in memory/client session).
- **Import/Export**: File System <-> Superset API.

View File

@@ -0,0 +1,69 @@
# Implementation Plan: 012-remove-superset-tool
**Branch**: `012-remove-superset-tool` | **Date**: 2026-01-22 | **Spec**: [spec.md](specs/012-remove-superset-tool/spec.md)
## Summary
The primary requirement is to refactor the backend by removing the redundant `superset_tool` module and merging its essential logic into the `backend` package. This involves migrating the `SupersetClient` logic, utility modules, and configuration models while deprecating CLI-specific interactive logic.
## Technical Context
**Language/Version**: Python 3.9+
**Primary Dependencies**: FastAPI, Pydantic, requests, pyyaml (migrated from superset_tool)
**Storage**: SQLite (tasks.db, migrations.db), Filesystem
**Testing**: pytest
**Target Platform**: Linux server
**Project Type**: Web application (FastAPI backend + SvelteKit frontend)
**Performance Goals**: N/A (Cleanup task)
**Constraints**: Zero references to `superset_tool` in `backend/` source code.
**Scale/Scope**: Refactoring core backend infrastructure.
## Constitution Check
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
1. **Semantic Protocol Compliance**: All migrated code must use `[DEF]` anchors and `@RELATION` tags as per `semantic_protocol.md`.
2. **Causal Validity**: Contracts (Pydantic models and API surfaces) must be defined before implementation.
3. **Everything is a Plugin**: Ensure that logic previously in `superset_tool` that acts as a tool or extension is integrated within the backend's plugin architecture if applicable.
4. **Fractal Complexity Limit**: Migrated modules must adhere to complexity limits.
## Project Structure
### Documentation (this feature)
```text
specs/012-remove-superset-tool/
├── plan.md # This file
├── research.md # Phase 0 output
├── data-model.md # Phase 1 output
├── quickstart.md # Phase 1 output
├── contracts/ # Phase 1 output
└── tasks.md # Phase 2 output
```
### Source Code (repository root)
```text
backend/
├── src/
│ ├── api/ # API routes
│ ├── core/ # Core logic (Target for migration)
│ │ ├── utils/ # Shared utilities
│ │ ├── config_models.py
│ │ ├── logger.py
│ │ └── superset_client.py
│ ├── models/ # Database models
│ ├── plugins/ # Plugin system
│ └── app.py
└── tests/ # Backend tests
```
**Structure Decision**: Web application structure. The `superset_tool` logic will be absorbed into `backend/src/core/` and its subdirectories.
## Complexity Tracking
> **Fill ONLY if Constitution Check has violations that must be justified**
| Violation | Why Needed | Simpler Alternative Rejected Because |
|-----------|------------|-------------------------------------|
| None | | |

View File

@@ -0,0 +1,20 @@
# Quickstart: 012-remove-superset-tool
## Overview
This feature refactors the backend to remove the `superset_tool` dependency. All Superset-related logic is now centralized in `backend/src/core/superset_client.py`.
## Key Changes
1. **Models**: `Environment` model in `backend/src/core/config_models.py` now includes `verify_ssl` and `timeout`.
2. **Client**: `SupersetClient` in `backend/src/core/superset_client.py` is now self-contained and does not inherit from any external module.
3. **Utilities**: Network and file I/O utilities previously in `superset_tool` are now available in `backend/src/core/utils/`.
## Verification Steps
1. **Run Backend**: Start the FastAPI server and ensure no import errors occur.
```bash
cd backend && .venv/bin/python3 -m uvicorn src.app:app --reload
```
2. **Check Connections**: Verify that existing Superset connections load correctly in the UI/API.
3. **Test Migration**: Trigger a dashboard list refresh or a migration task to confirm the consolidated client works as expected.
## Cleanup
Once verified, the `superset_tool/` directory and root-level CLI scripts (`migration_script.py`, `run_mapper.py`) can be deleted.

View File

@@ -0,0 +1,32 @@
# Research: 012-remove-superset-tool
## Unknowns & Clarifications
### 1. Merging `SupersetConfig` into `Environment`
- **Decision**: Fields from `SupersetConfig` (auth, verify_ssl, timeout) will be merged into the `Environment` model in `backend/src/core/config_models.py`.
- **Rationale**: Centralizes configuration and removes dependency on `superset_tool.models`.
- **Alternatives considered**: Keeping them separate, but that defeats the purpose of the refactoring.
### 2. Utility Migration
- **Decision**: `superset_tool/utils/` will be flattened into `backend/src/core/utils/`.
- **Rationale**: Simplifies the directory structure and aligns with the backend's existing utility pattern.
- **Alternatives considered**: Creating a `backend/src/core/utils/superset/` subdirectory, but flattening is preferred for simplicity.
### 3. CLI Deprecation
- **Decision**: `migration_script.py`, `run_mapper.py`, and `whiptail_fallback.py` will be deprecated and not migrated.
- **Rationale**: The web UI now handles these operations, and maintaining interactive CLI logic in the backend is out of scope.
### 4. Logging Consolidation
- **Decision**: Remove `SupersetLogger` and `belief_scope` from `superset_tool` and use `backend/src/core/logger.py`.
- **Rationale**: Ensures consistent logging across the entire backend application.
## Dependency Analysis
- `backend/requirements.txt` already contains `requests`, `PyYAML`, `pydantic`.
- `superset_tool/requirements.txt` is empty, indicating it likely relied on the same environment or was partially integrated.
- **Action**: Ensure all `superset_tool` imports are replaced with backend-local equivalents.
## Integration Patterns
- `backend/src/core/superset_client.py` currently inherits from `superset_tool.client.SupersetClient`.
- **Action**: Move all methods from `superset_tool/client.py` into `backend/src/core/superset_client.py` and remove the inheritance. Refactor `APIClient` (from `superset_tool/utils/network.py`) to be a utility or private helper within the backend.

View File

@@ -0,0 +1,78 @@
# Feature Specification: Backend Refactoring - Remove superset_tool
**Feature Branch**: `012-remove-superset-tool`
**Created**: 2026-01-22
**Status**: Draft
**Input**: User description: "нужен рефакторинг бека - я хочу исключить модуль superset_tool, пусть останется только backend"
## Clarifications
### Session 2026-01-22
- Q: How should we reconcile the `SupersetConfig` model from `superset_tool` with the `Environment` model in the backend? → A: Merge all `SupersetConfig` fields (e.g., `verify_ssl`, `timeout`) into the `Environment` model in `backend/src/core/config_models.py`.
- Q: Where should the utility modules from `superset_tool/utils/` be moved within the `backend/src/core/` directory? → A: Flatten them directly into `backend/src/core/utils/` alongside existing utilities.
- Q: Should interactive CLI utilities (whiptail_fallback.py, migration_script.py) be migrated? → A: No, deprecate them and do not migrate CLI-specific interactive UI logic to the backend.
- Q: How should we handle `SupersetLogger` and `belief_scope`? → A: Consolidate all logging into `backend/src/core/logger.py` and remove the redundant `SupersetLogger` class.
## User Scenarios & Testing *(mandatory)*
### User Story 1 - Maintain Core Functionality (Priority: P1)
As a developer, I want to remove the redundant `superset_tool` module and merge its essential logic into the `backend` package so that the codebase is easier to maintain and deploy.
**Why this priority**: This is a critical architectural cleanup. The existence of two overlapping modules (`superset_tool` and `backend/src/core/superset_client.py`) creates confusion and potential bugs.
**Independent Test**: The application (FastAPI backend) should start and perform all Superset-related operations (fetching dashboards, datasets, etc.) without the `superset_tool` directory present in the root.
**Acceptance Scenarios**:
1. **Given** the `superset_tool` directory is deleted, **When** I run the backend server, **Then** it should start without `ModuleNotFoundError`.
2. **Given** the backend is running, **When** I trigger a dashboard list refresh in the UI, **Then** the backend should successfully fetch data from Superset using the migrated client logic.
3. **Given** a migration task, **When** I execute it, **Then** it should successfully use the consolidated `SupersetClient` to interact with the API.
---
### User Story 2 - Unified Configuration and Models (Priority: P2)
As a developer, I want to use a single set of Pydantic models and configuration logic within the `backend` package instead of relying on external `superset_tool.models`.
**Why this priority**: Eliminates duplicate definitions and ensures consistency across the backend.
**Independent Test**: Verify that `backend/src/core/config_models.py` (or equivalent) contains all necessary fields previously defined in `superset_tool/models.py`.
**Acceptance Scenarios**:
1. **Given** a connection configuration in the database, **When** the backend loads it, **Then** it should correctly instantiate the consolidated configuration models.
---
### Edge Cases
- **Deprecated Scripts**: CLI-only scripts like `migration_script.py` and `run_mapper.py` in the root will be deprecated and removed as their logic is now in the web UI.
- **Dependency Conflicts**: How does the system handle potential version conflicts between `backend/requirements.txt` and `superset_tool/requirements.txt`?
- **Environment Variables**: Ensure that any environment variables previously used by `superset_tool` are still correctly loaded by the consolidated client.
## Requirements *(mandatory)*
### Functional Requirements
- **FR-001**: The system MUST NOT depend on the `superset_tool` top-level module.
- **FR-002**: All essential logic from `superset_tool/client.py` MUST be migrated to `backend/src/core/superset_client.py`. Utility modules from `superset_tool/utils/` (network, fileio, dataset_mapper) MUST be migrated to `backend/src/core/utils/`.
- **FR-003**: `backend/src/core/superset_client.py` MUST be refactored to be a self-contained client that doesn't inherit from `superset_tool.client.BaseSupersetClient`.
- **FR-004**: Redundant logging logic (`SupersetLogger`) MUST be removed in favor of `backend/src/core/logger.py`.
- **FR-004**: The backend's `requirements.txt` MUST be updated to include any dependencies previously required by `superset_tool` (e.g., `requests`, `pyyaml`).
- **FR-005**: All imports in `backend/src/` that reference `superset_tool` MUST be updated to reference the new locations within the backend.
### Key Entities *(include if feature involves data)*
- **SupersetClient**: The consolidated class responsible for all interactions with the Superset REST API.
- **Environment**: The unified configuration model for Superset connections, incorporating legacy `SupersetConfig` fields.
## Success Criteria *(mandatory)*
### Measurable Outcomes
- **SC-001**: Zero references to `superset_tool` in the `backend/` source code.
- **SC-002**: 100% of existing Superset integration tests pass after the refactoring.
- **SC-003**: The `superset_tool/` directory can be safely deleted from the repository.
- **SC-004**: Backend startup time remains unaffected or improves slightly due to simplified import structure.

View File

@@ -0,0 +1,34 @@
# Tasks: 012-remove-superset-tool
## Phase 1: Setup
- [x] T001 Initialize branch `012-remove-superset-tool`
- [x] T002 Update `backend/requirements.txt` with dependencies from `superset_tool` (requests, pyyaml)
## Phase 2: Foundational (Infrastructure Migration)
- [x] T003 [P] Migrate utility modules from `superset_tool/utils/` to `backend/src/core/utils/`
- [x] T004 [P] Refactor `backend/src/core/logger.py` to incorporate essential logging logic from `superset_tool/utils/logging.py`
- [x] T005 Update `Environment` model in `backend/src/core/config_models.py` with `verify_ssl` and `timeout` fields
## Phase 3: User Story 1 - Maintain Core Functionality [US1]
- [x] T006 [US1] Refactor `backend/src/core/superset_client.py` to be self-contained (remove inheritance from `superset_tool.client.BaseSupersetClient`)
- [x] T007 [US1] Migrate all remaining methods from `superset_tool/client.py` to `backend/src/core/superset_client.py`
- [x] T008 [P] [US1] Update all imports in `backend/src/` to remove references to `superset_tool`
- [x] T009 [US1] Verify backend starts without `superset_tool` directory present
- [x] T010 [US1] Verify dashboard and dataset fetching works via API/UI
## Phase 4: User Story 2 - Unified Configuration and Models [US2]
- [x] T011 [US2] Ensure all Pydantic models in `backend/src/core/config_models.py` cover legacy `superset_tool/models.py` requirements
- [x] T012 [US2] Update database loading logic to correctly populate new `Environment` fields
## Phase 5: Polish & Cleanup
- [x] T013 Remove `superset_tool/` directory
- [x] T014 Remove deprecated root scripts: `migration_script.py`, `run_mapper.py`, `whiptail_fallback.py`
- [x] T015 Run full backend test suite to ensure no regressions
## Dependencies
- Phase 2 must be completed before Phase 3.
- T006 and T007 are prerequisites for T008 and T009.
## Implementation Strategy
- **MVP First**: Focus on T006-T008 to ensure the backend can function independently of the `superset_tool` module.
- **Incremental Delivery**: Migrate utilities and models first to provide the necessary foundation for the client refactoring.

View File

@@ -1,14 +0,0 @@
# [DEF:superset_tool:Module]
# @SEMANTICS: package, root
# @PURPOSE: Root package for superset_tool.
# @LAYER: Domain
# @PUBLIC_API: SupersetClient, SupersetConfig
# [SECTION: IMPORTS]
from .client import SupersetClient
from .models import SupersetConfig
# [/SECTION]
__all__ = ["SupersetClient", "SupersetConfig"]
# [/DEF:superset_tool:Module]

View File

@@ -1,516 +0,0 @@
# [DEF:superset_tool.client:Module]
#
# @SEMANTICS: superset, api, client, rest, http, dashboard, dataset, import, export
# @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> superset_tool.models
# @RELATION: DEPENDS_ON -> superset_tool.exceptions
# @RELATION: DEPENDS_ON -> superset_tool.utils
#
# @INVARIANT: All network operations must use the internal APIClient instance.
# @CONSTRAINT: No direct use of 'requests' library outside of APIClient.
# @PUBLIC_API: SupersetClient
# [SECTION: IMPORTS]
import json
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from superset_tool.models import SupersetConfig
from superset_tool.exceptions import ExportError, InvalidZipFormatError
from superset_tool.utils.fileio import get_filename_from_headers
from superset_tool.utils.logger import SupersetLogger, belief_scope
from superset_tool.utils.network import APIClient
# [/SECTION]
# [DEF:SupersetClient:Class]
# @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
# @RELATION: CREATES_INSTANCE_OF -> APIClient
# @RELATION: USES -> SupersetConfig
class SupersetClient:
# [DEF:authenticate:Function]
# @PURPOSE: Authenticates the client using the configured credentials.
# @PRE: self.network must be initialized with valid auth configuration.
# @POST: Client is authenticated and tokens are stored.
# @RETURN: Dict[str, str] - Authentication tokens.
def authenticate(self):
with belief_scope("SupersetClient.authenticate"):
return self.network.authenticate()
# [DEF:__init__:Function]
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @PRE: `config` должен быть валидным объектом SupersetConfig.
# @POST: Атрибуты `logger`, `config`, и `network` созданы и готовы к работе.
# @PARAM: config (SupersetConfig) - Конфигурация подключения.
# @PARAM: logger (Optional[SupersetLogger]) - Экземпляр логгера.
def __init__(self, config: SupersetConfig, logger: Optional[SupersetLogger] = None):
with belief_scope("__init__"):
self.logger = logger or SupersetLogger(name="SupersetClient")
self.logger.info("[SupersetClient.__init__][Enter] Initializing SupersetClient.")
self._validate_config(config)
self.config = config
self.network = APIClient(
config=config.dict(),
verify_ssl=config.verify_ssl,
timeout=config.timeout,
logger=self.logger,
)
self.delete_before_reimport: bool = False
self.logger.info("[SupersetClient.__init__][Exit] SupersetClient initialized.")
# [/DEF:__init__:Function]
# [DEF:_validate_config:Function]
# @PURPOSE: Проверяет, что переданный объект конфигурации имеет корректный тип.
# @PRE: `config` должен быть передан.
# @POST: Если проверка пройдена, выполнение продолжается.
# @THROW: TypeError - Если `config` не является экземпляром `SupersetConfig`.
# @PARAM: config (SupersetConfig) - Объект для проверки.
def _validate_config(self, config: SupersetConfig) -> None:
with belief_scope("_validate_config"):
self.logger.debug("[_validate_config][Enter] Validating SupersetConfig.")
assert isinstance(config, SupersetConfig), "Конфигурация должна быть экземпляром SupersetConfig"
self.logger.debug("[_validate_config][Exit] Config is valid.")
# [/DEF:_validate_config:Function]
@property
# [DEF:headers:Function]
# @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый словарь содержит актуальные заголовки, включая токен авторизации.
def headers(self) -> dict:
with belief_scope("headers"):
return self.network.headers
# [/DEF:headers:Function]
# [DEF:get_dashboards:Function]
# @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все дашборды, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса для API.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список дашбордов).
def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards"):
assert self.network, "[get_dashboards][PRE] Network client must be initialized."
self.logger.info("[get_dashboards][Enter] Fetching dashboards.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
validated_query['columns'] = ["slug", "id", "changed_on_utc", "dashboard_title", "published"]
total_count = self._fetch_total_object_count(endpoint="/dashboard/")
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:get_dashboards:Function]
# [DEF:export_dashboard:Function]
# @PURPOSE: Экспортирует дашборд в виде ZIP-архива.
# @RELATION: CALLS -> self.network.request
# @PRE: dashboard_id должен быть положительным целым числом.
# @POST: Возвращает бинарное содержимое ZIP-архива и имя файла.
# @THROW: ExportError - Если экспорт завершился неудачей.
# @PARAM: dashboard_id (int) - ID дашборда для экспорта.
# @RETURN: Tuple[bytes, str] - Бинарное содержимое ZIP-архива и имя файла.
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
assert isinstance(dashboard_id, int) and dashboard_id > 0, "[export_dashboard][PRE] dashboard_id must be a positive integer."
self.logger.info("[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id)
response = self.network.request(
method="GET",
endpoint="/dashboard/export/",
params={"q": json.dumps([dashboard_id])},
stream=True,
raw_response=True,
)
response = cast(Response, response)
self._validate_export_response(response, dashboard_id)
filename = self._resolve_export_filename(response, dashboard_id)
self.logger.info("[export_dashboard][Exit] Exported dashboard %s to %s.", dashboard_id, filename)
return response.content, filename
# [/DEF:export_dashboard:Function]
# [DEF:import_dashboard:Function]
# @PURPOSE: Импортирует дашборд из ZIP-файла с возможностью автоматического удаления и повторной попытки при ошибке.
# @RELATION: CALLS -> self._do_import
# @RELATION: CALLS -> self.delete_dashboard
# @RELATION: CALLS -> self.get_dashboards
# @PRE: Файл, указанный в `file_name`, должен существовать и быть валидным ZIP-архивом Superset.
# @POST: Дашборд успешно импортирован, возвращен ответ API.
# @THROW: FileNotFoundError - Если файл не найден.
# @THROW: InvalidZipFormatError - Если файл не является валидным ZIP-архивом Superset.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-архиву.
# @PARAM: dash_id (Optional[int]) - ID дашборда для удаления при сбое.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда для поиска ID, если ID не предоставлен.
# @RETURN: Dict - Ответ API в случае успеха.
def import_dashboard(self, file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict:
with belief_scope("import_dashboard"):
assert file_name, "[import_dashboard][PRE] file_name must be provided."
file_path = str(file_name)
self._validate_import_file(file_path)
try:
return self._do_import(file_path)
except Exception as exc:
self.logger.error("[import_dashboard][Failure] First import attempt failed: %s", exc, exc_info=True)
if not self.delete_before_reimport:
raise
target_id = self._resolve_target_id_for_delete(dash_id, dash_slug)
if target_id is None:
self.logger.error("[import_dashboard][Failure] No ID available for delete-retry.")
raise
self.delete_dashboard(target_id)
self.logger.info("[import_dashboard][State] Deleted dashboard ID %s, retrying import.", target_id)
return self._do_import(file_path)
# [/DEF:import_dashboard:Function]
# [DEF:_resolve_target_id_for_delete:Function]
# @PURPOSE: Определяет ID дашборда для удаления, используя ID или slug.
# @PARAM: dash_id (Optional[int]) - ID дашборда.
# @PARAM: dash_slug (Optional[str]) - Slug дашборда.
# @PRE: По крайней мере один из параметров (dash_id или dash_slug) должен быть предоставлен.
# @POST: Возвращает ID дашборда, если найден, иначе None.
# @THROW: APIError - В случае ошибки сетевого запроса при поиске по slug.
# @RETURN: Optional[int] - Найденный ID или None.
def _resolve_target_id_for_delete(self, dash_id: Optional[int], dash_slug: Optional[str]) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
assert dash_id is not None or dash_slug is not None, "[_resolve_target_id_for_delete][PRE] At least one of ID or slug must be provided."
if dash_id is not None:
return dash_id
if dash_slug is not None:
self.logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
self.logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
self.logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
self.logger.debug("[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.", dash_slug)
try:
_, candidates = self.get_dashboards(query={"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]})
if candidates:
target_id = candidates[0]["id"]
self.logger.debug("[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.", target_id)
return target_id
except Exception as e:
self.logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
# [/DEF:_resolve_target_id_for_delete:Function]
# [DEF:_do_import:Function]
# @PURPOSE: Выполняет один запрос на импорт без обработки исключений.
# @PRE: Файл должен существовать.
# @POST: Файл успешно загружен, возвращен ответ API.
# @THROW: FileNotFoundError - Если файл не существует.
# @PARAM: file_name (Union[str, Path]) - Путь к файлу.
# @RETURN: Dict - Ответ API.
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
self.logger.debug(f"[_do_import][State] Uploading file: {file_name}")
file_path = Path(file_name)
if file_path.exists():
self.logger.debug(f"[_do_import][State] File size: {file_path.stat().st_size} bytes")
else:
self.logger.error(f"[_do_import][Failure] File does not exist: {file_name}")
raise FileNotFoundError(f"File does not exist: {file_name}")
return self.network.upload_file(
endpoint="/dashboard/import/",
file_info={"file_obj": file_path, "file_name": file_path.name, "form_field": "formData"},
extra_data={"overwrite": "true"},
timeout=self.config.timeout * 2,
)
# [/DEF:_do_import:Function]
# [DEF:delete_dashboard:Function]
# @PURPOSE: Удаляет дашборд по его ID или slug.
# @RELATION: CALLS -> self.network.request
# @PRE: dashboard_id должен быть предоставлен.
# @POST: Дашборд удален или залогировано предупреждение.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @PARAM: dashboard_id (Union[int, str]) - ID или slug дашборда.
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
assert dashboard_id, "[delete_dashboard][PRE] dashboard_id must be provided."
self.logger.info("[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id)
response = self.network.request(method="DELETE", endpoint=f"/dashboard/{dashboard_id}")
response = cast(Dict, response)
if response.get("result", True) is not False:
self.logger.info("[delete_dashboard][Success] Dashboard %s deleted.", dashboard_id)
else:
self.logger.warning("[delete_dashboard][Warning] Unexpected response while deleting %s: %s", dashboard_id, response)
# [/DEF:delete_dashboard:Function]
# [DEF:_extract_dashboard_id_from_zip:Function]
# @PURPOSE: Извлекает ID дашборда из `metadata.yaml` внутри ZIP-архива.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-файлу.
# @PRE: Файл, указанный в `file_name`, должен быть валидным ZIP-архивом.
# @POST: Возвращает ID дашборда, если найден в metadata.yaml, иначе None.
# @THROW: ImportError - Если не установлен `yaml`.
# @RETURN: Optional[int] - ID дашборда или None.
def _extract_dashboard_id_from_zip(self, file_name: Union[str, Path]) -> Optional[int]:
with belief_scope("_extract_dashboard_id_from_zip"):
assert zipfile.is_zipfile(file_name), "[_extract_dashboard_id_from_zip][PRE] file_name must be a valid zip file."
try:
import yaml
with zipfile.ZipFile(file_name, "r") as zf:
for name in zf.namelist():
if name.endswith("metadata.yaml"):
with zf.open(name) as meta_file:
meta = yaml.safe_load(meta_file)
dash_id = meta.get("dashboard_uuid") or meta.get("dashboard_id")
if dash_id: return int(dash_id)
except Exception as exc:
self.logger.error("[_extract_dashboard_id_from_zip][Failure] %s", exc, exc_info=True)
return None
# [/DEF:_extract_dashboard_id_from_zip:Function]
# [DEF:_extract_dashboard_slug_from_zip:Function]
# @PURPOSE: Извлекает slug дашборда из `metadata.yaml` внутри ZIP-архива.
# @PARAM: file_name (Union[str, Path]) - Путь к ZIP-файлу.
# @PRE: Файл, указанный в `file_name`, должен быть валидным ZIP-архивом.
# @POST: Возвращает slug дашборда, если найден в metadata.yaml, иначе None.
# @THROW: ImportError - Если не установлен `yaml`.
# @RETURN: Optional[str] - Slug дашборда или None.
def _extract_dashboard_slug_from_zip(self, file_name: Union[str, Path]) -> Optional[str]:
with belief_scope("_extract_dashboard_slug_from_zip"):
assert zipfile.is_zipfile(file_name), "[_extract_dashboard_slug_from_zip][PRE] file_name must be a valid zip file."
try:
import yaml
with zipfile.ZipFile(file_name, "r") as zf:
for name in zf.namelist():
if name.endswith("metadata.yaml"):
with zf.open(name) as meta_file:
meta = yaml.safe_load(meta_file)
if slug := meta.get("slug"):
return str(slug)
except Exception as exc:
self.logger.error("[_extract_dashboard_slug_from_zip][Failure] %s", exc, exc_info=True)
return None
# [/DEF:_extract_dashboard_slug_from_zip:Function]
# [DEF:_validate_export_response:Function]
# @PURPOSE: Проверяет, что HTTP-ответ на экспорт является валидным ZIP-архивом.
# @PRE: response должен быть объектом requests.Response.
# @POST: Проверка пройдена, если ответ является непустым ZIP-архивом.
# @THROW: ExportError - Если ответ не является ZIP-архивом или пуст.
# @PARAM: response (Response) - HTTP ответ.
# @PARAM: dashboard_id (int) - ID дашборда.
def _validate_export_response(self, response: Response, dashboard_id: int) -> None:
with belief_scope("_validate_export_response"):
assert isinstance(response, Response), "[_validate_export_response][PRE] response must be a requests.Response object."
content_type = response.headers.get("Content-Type", "")
if "application/zip" not in content_type:
raise ExportError(f"Получен не ZIP-архив (Content-Type: {content_type})")
if not response.content:
raise ExportError("Получены пустые данные при экспорте")
# [/DEF:_validate_export_response:Function]
# [DEF:_resolve_export_filename:Function]
# @PURPOSE: Определяет имя файла для экспорта из заголовков или генерирует его.
# @PRE: response должен быть объектом requests.Response.
# @POST: Возвращает непустое имя файла.
# @PARAM: response (Response) - HTTP ответ.
# @PARAM: dashboard_id (int) - ID дашборда.
# @RETURN: str - Имя файла.
def _resolve_export_filename(self, response: Response, dashboard_id: int) -> str:
with belief_scope("_resolve_export_filename"):
assert isinstance(response, Response), "[_resolve_export_filename][PRE] response must be a requests.Response object."
filename = get_filename_from_headers(dict(response.headers))
if not filename:
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
filename = f"dashboard_export_{dashboard_id}_{timestamp}.zip"
self.logger.warning("[_resolve_export_filename][Warning] Generated filename: %s", filename)
return filename
# [/DEF:_resolve_export_filename:Function]
# [DEF:_validate_query_params:Function]
# @PURPOSE: Формирует корректный набор параметров запроса с пагинацией.
# @PARAM: query (Optional[Dict]) - Исходные параметры.
# @PRE: query, если предоставлен, должен быть словарем.
# @POST: Возвращает словарь, содержащий базовые параметры пагинации, объединенные с `query`.
# @RETURN: Dict - Валидированные параметры.
def _validate_query_params(self, query: Optional[Dict]) -> Dict:
with belief_scope("_validate_query_params"):
assert query is None or isinstance(query, dict), "[_validate_query_params][PRE] query must be a dictionary or None."
base_query = {"page": 0, "page_size": 1000}
return {**base_query, **(query or {})}
# [/DEF:_validate_query_params:Function]
# [DEF:_fetch_total_object_count:Function]
# @PURPOSE: Получает общее количество объектов по указанному эндпоинту для пагинации.
# @PARAM: endpoint (str) - API эндпоинт.
# @PRE: endpoint должен быть непустой строкой.
# @POST: Возвращает общее количество объектов (>= 0).
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: int - Количество объектов.
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
assert endpoint and isinstance(endpoint, str), "[_fetch_total_object_count][PRE] endpoint must be a non-empty string."
return self.network.fetch_paginated_count(
endpoint=endpoint,
query_params={"page": 0, "page_size": 1},
count_field="count",
)
# [/DEF:_fetch_total_object_count:Function]
# [DEF:_fetch_all_pages:Function]
# @PURPOSE: Итерируется по всем страницам пагинированного API и собирает все данные.
# @PARAM: endpoint (str) - API эндпоинт.
# @PARAM: pagination_options (Dict) - Опции пагинации.
# @PRE: endpoint должен быть непустой строкой, pagination_options - словарем.
# @POST: Возвращает полный список объектов.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: List[Dict] - Список всех объектов.
def _fetch_all_pages(self, endpoint: str, pagination_options: Dict) -> List[Dict]:
with belief_scope("_fetch_all_pages"):
assert endpoint and isinstance(endpoint, str), "[_fetch_all_pages][PRE] endpoint must be a non-empty string."
assert isinstance(pagination_options, dict), "[_fetch_all_pages][PRE] pagination_options must be a dictionary."
return self.network.fetch_paginated_data(endpoint=endpoint, pagination_options=pagination_options)
# [/DEF:_fetch_all_pages:Function]
# [DEF:_validate_import_file:Function]
# @PURPOSE: Проверяет, что файл существует, является ZIP-архивом и содержит `metadata.yaml`.
# @PRE: zip_path должен быть предоставлен.
# @POST: Проверка пройдена, если файл существует, является ZIP и содержит `metadata.yaml`.
# @THROW: FileNotFoundError - Если файл не найден.
# @THROW: InvalidZipFormatError - Если файл не является ZIP или не содержит `metadata.yaml`.
# @PARAM: zip_path (Union[str, Path]) - Путь к файлу.
def _validate_import_file(self, zip_path: Union[str, Path]) -> None:
with belief_scope("_validate_import_file"):
assert zip_path, "[_validate_import_file][PRE] zip_path must be provided."
path = Path(zip_path)
assert path.exists(), f"Файл {zip_path} не существует"
assert zipfile.is_zipfile(path), f"Файл {zip_path} не является ZIP-архивом"
with zipfile.ZipFile(path, "r") as zf:
assert any(n.endswith("metadata.yaml") for n in zf.namelist()), f"Архив {zip_path} не содержит 'metadata.yaml'"
# [/DEF:_validate_import_file:Function]
# [DEF:get_datasets:Function]
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все датасеты, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список датасетов).
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
assert self.network, "[get_datasets][PRE] Network client must be initialized."
self.logger.info("[get_datasets][Enter] Fetching datasets.")
validated_query = self._validate_query_params(query)
total_count = self._fetch_total_object_count(endpoint="/dataset/")
paginated_data = self._fetch_all_pages(
endpoint="/dataset/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:get_datasets:Function]
# [DEF:get_databases:Function]
# @PURPOSE: Получает полный список баз данных, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> self._fetch_total_object_count
# @RELATION: CALLS -> self._fetch_all_pages
# @PARAM: query (Optional[Dict]) - Дополнительные параметры запроса.
# @PRE: self.network должен быть инициализирован.
# @POST: Возвращаемый список содержит все базы данных, доступные по API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Tuple[int, List[Dict]] - Кортеж (общее количество, список баз данных).
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
assert self.network, "[get_databases][PRE] Network client must be initialized."
self.logger.info("[get_databases][Enter] Fetching databases.")
validated_query = self._validate_query_params(query or {})
if 'columns' not in validated_query:
validated_query['columns'] = []
total_count = self._fetch_total_object_count(endpoint="/database/")
paginated_data = self._fetch_all_pages(
endpoint="/database/",
pagination_options={"base_query": validated_query, "total_count": total_count, "results_field": "result"},
)
self.logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:get_databases:Function]
# [DEF:get_dataset:Function]
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: dataset_id (int) - ID датасета.
# @PRE: dataset_id должен быть положительным целым числом.
# @POST: Возвращает словарь с информацией о датасете.
# @THROW: APIError - В случае ошибки сетевого запроса или если датасет не найден.
# @RETURN: Dict - Информация о датасете.
def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("get_dataset"):
assert isinstance(dataset_id, int) and dataset_id > 0, "[get_dataset][PRE] dataset_id must be a positive integer."
self.logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
response = self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}")
response = cast(Dict, response)
self.logger.info("[get_dataset][Exit] Got dataset %s.", dataset_id)
return response
# [/DEF:get_dataset:Function]
# [DEF:get_database:Function]
# @PURPOSE: Получает информацию о конкретной базе данных по её ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: database_id (int) - ID базы данных.
# @PRE: database_id должен быть положительным целым числом.
# @POST: Возвращает словарь с информацией о базе данных.
# @THROW: APIError - В случае ошибки сетевого запроса или если база данных не найдена.
# @RETURN: Dict - Информация о базе данных.
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
assert isinstance(database_id, int) and database_id > 0, "[get_database][PRE] database_id must be a positive integer."
self.logger.info("[get_database][Enter] Fetching database %s.", database_id)
response = self.network.request(method="GET", endpoint=f"/database/{database_id}")
response = cast(Dict, response)
self.logger.info("[get_database][Exit] Got database %s.", database_id)
return response
# [/DEF:get_database:Function]
# [DEF:update_dataset:Function]
# @PURPOSE: Обновляет данные датасета по его ID.
# @RELATION: CALLS -> self.network.request
# @PARAM: dataset_id (int) - ID датасета.
# @PARAM: data (Dict) - Данные для обновления.
# @PRE: dataset_id должен быть положительным целым числом, data - непустым словарем.
# @POST: Датасет успешно обновлен, возвращен ответ API.
# @THROW: APIError - В случае ошибки сетевого запроса.
# @RETURN: Dict - Ответ API.
def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
with belief_scope("update_dataset"):
assert isinstance(dataset_id, int) and dataset_id > 0, "[update_dataset][PRE] dataset_id must be a positive integer."
assert isinstance(data, dict) and data, "[update_dataset][PRE] data must be a non-empty dictionary."
self.logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
response = self.network.request(
method="PUT",
endpoint=f"/dataset/{dataset_id}",
data=json.dumps(data),
headers={'Content-Type': 'application/json'}
)
response = cast(Dict, response)
self.logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:update_dataset:Function]
# [/DEF:SupersetClient:Class]
# [/DEF:superset_tool.client:Module]

View File

@@ -1,173 +0,0 @@
# [DEF:superset_tool.exceptions:Module]
# @PURPOSE: Определяет иерархию пользовательских исключений для всего инструмента, обеспечивая единую точку обработки ошибок.
# @SEMANTICS: exception, error, hierarchy
# @LAYER: Infra
# [SECTION: IMPORTS]
from pathlib import Path
from typing import Optional, Dict, Any, Union
# [/SECTION]
# [DEF:SupersetToolError:Class]
# @PURPOSE: Базовый класс для всех ошибок, генерируемых инструментом.
# @RELATION: INHERITS_FROM -> Exception
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Optional[Dict[str, Any]]) - Дополнительный контекст ошибки.
class SupersetToolError(Exception):
# [DEF:__init__:Function]
# @PURPOSE: Initializes the base tool error.
# @PRE: message is a string, context is optional dict.
# @POST: Error is initialized with combined message and context.
def __init__(self, message: str, context: Optional[Dict[str, Any]] = None):
self.context = context or {}
super().__init__(f"{message} | Context: {self.context}")
# [/DEF:__init__:Function]
# [/DEF:SupersetToolError:Class]
# [DEF:AuthenticationError:Class]
# @PURPOSE: Ошибки, связанные с аутентификацией или авторизацией.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class AuthenticationError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an authentication error.
# @PRE: Optional message and context.
# @POST: Error is initialized with authentication context.
def __init__(self, message: str = "Authentication failed", **context: Any):
super().__init__(f"[AUTH_FAILURE] {message}", context={"type": "authentication", **context})
# [/DEF:__init__:Function]
# [/DEF:AuthenticationError:Class]
# [DEF:PermissionDeniedError:Class]
# @PURPOSE: Ошибка, возникающая при отказе в доступе к ресурсу.
# @RELATION: INHERITS_FROM -> AuthenticationError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: required_permission (Optional[str]) - Требуемое разрешение.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class PermissionDeniedError(AuthenticationError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a permission denied error.
# @PRE: Optional message, permission string, and context.
# @POST: Error is initialized with permission details.
def __init__(self, message: str = "Permission denied", required_permission: Optional[str] = None, **context: Any):
full_message = f"Permission denied: {required_permission}" if required_permission else message
super().__init__(full_message, context={"required_permission": required_permission, **context})
# [/DEF:__init__:Function]
# [/DEF:PermissionDeniedError:Class]
# [DEF:SupersetAPIError:Class]
# @PURPOSE: Общие ошибки при взаимодействии с Superset API.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class SupersetAPIError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a Superset API error.
# @PRE: Optional message and context.
# @POST: Error is initialized with API failure context.
def __init__(self, message: str = "Superset API error", **context: Any):
super().__init__(f"[API_FAILURE] {message}", context={"type": "api_call", **context})
# [/DEF:__init__:Function]
# [/DEF:SupersetAPIError:Class]
# [DEF:ExportError:Class]
# @PURPOSE: Ошибки, специфичные для операций экспорта.
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class ExportError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an export error.
# @PRE: Optional message and context.
# @POST: Error is initialized with export failure subtype.
def __init__(self, message: str = "Dashboard export failed", **context: Any):
super().__init__(f"[EXPORT_FAILURE] {message}", context={"subtype": "export", **context})
# [/DEF:__init__:Function]
# [/DEF:ExportError:Class]
# [DEF:DashboardNotFoundError:Class]
# @PURPOSE: Ошибка, когда запрошенный дашборд или ресурс не найден (404).
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: dashboard_id_or_slug (Union[int, str]) - ID или slug дашборда.
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class DashboardNotFoundError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a dashboard not found error.
# @PRE: dashboard_id_or_slug is provided.
# @POST: Error is initialized with resource identification.
def __init__(self, dashboard_id_or_slug: Union[int, str], message: str = "Dashboard not found", **context: Any):
super().__init__(f"[NOT_FOUND] Dashboard '{dashboard_id_or_slug}' {message}", context={"subtype": "not_found", "resource_id": dashboard_id_or_slug, **context})
# [/DEF:__init__:Function]
# [/DEF:DashboardNotFoundError:Class]
# [DEF:DatasetNotFoundError:Class]
# @PURPOSE: Ошибка, когда запрашиваемый набор данных не существует (404).
# @RELATION: INHERITS_FROM -> SupersetAPIError
# @PARAM: dataset_id_or_slug (Union[int, str]) - ID или slug набора данных.
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class DatasetNotFoundError(SupersetAPIError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a dataset not found error.
# @PRE: dataset_id_or_slug is provided.
# @POST: Error is initialized with resource identification.
def __init__(self, dataset_id_or_slug: Union[int, str], message: str = "Dataset not found", **context: Any):
super().__init__(f"[NOT_FOUND] Dataset '{dataset_id_or_slug}' {message}", context={"subtype": "not_found", "resource_id": dataset_id_or_slug, **context})
# [/DEF:__init__:Function]
# [/DEF:DatasetNotFoundError:Class]
# [DEF:InvalidZipFormatError:Class]
# @PURPOSE: Ошибка, указывающая на некорректный формат или содержимое ZIP-архива.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: file_path (Optional[Union[str, Path]]) - Путь к файлу.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class InvalidZipFormatError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes an invalid ZIP format error.
# @PRE: Optional message, file path, and context.
# @POST: Error is initialized with file validation context.
def __init__(self, message: str = "Invalid ZIP format or content", file_path: Optional[Union[str, Path]] = None, **context: Any):
super().__init__(f"[FILE_ERROR] {message}", context={"type": "file_validation", "file_path": str(file_path) if file_path else "N/A", **context})
# [/DEF:__init__:Function]
# [/DEF:InvalidZipFormatError:Class]
# [DEF:NetworkError:Class]
# @PURPOSE: Ошибки, связанные с сетевым соединением.
# @RELATION: INHERITS_FROM -> SupersetToolError
# @PARAM: message (str) - Сообщение об ошибке.
# @PARAM: context (Any) - Дополнительный контекст ошибки.
class NetworkError(SupersetToolError):
# [DEF:__init__:Function]
# @PURPOSE: Initializes a network error.
# @PRE: Optional message and context.
# @POST: Error is initialized with network failure context.
def __init__(self, message: str = "Network connection failed", **context: Any):
super().__init__(f"[NETWORK_FAILURE] {message}", context={"type": "network", **context})
# [/DEF:__init__:Function]
# [/DEF:NetworkError:Class]
# [DEF:FileOperationError:Class]
# @PURPOSE: Общие ошибки файловых операций (I/O).
# @RELATION: INHERITS_FROM -> SupersetToolError
class FileOperationError(SupersetToolError):
pass
# [/DEF:FileOperationError:Class]
# [DEF:InvalidFileStructureError:Class]
# @PURPOSE: Ошибка, указывающая на некорректную структуру файлов или директорий.
# @RELATION: INHERITS_FROM -> FileOperationError
class InvalidFileStructureError(FileOperationError):
pass
# [/DEF:InvalidFileStructureError:Class]
# [DEF:ConfigurationError:Class]
# @PURPOSE: Ошибки, связанные с неверной конфигурацией инструмента.
# @RELATION: INHERITS_FROM -> SupersetToolError
class ConfigurationError(SupersetToolError):
pass
# [/DEF:ConfigurationError:Class]
# [/DEF:superset_tool.exceptions:Module]

View File

@@ -1,87 +0,0 @@
# [DEF:superset_tool.models:Module]
#
# @SEMANTICS: pydantic, model, config, validation, data-structure
# @PURPOSE: Определяет Pydantic-модели для конфигурации инструмента, обеспечивая валидацию данных.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> pydantic
# @RELATION: DEPENDS_ON -> superset_tool.utils.logger
# @PUBLIC_API: SupersetConfig, DatabaseConfig
# [SECTION: IMPORTS]
import re
from typing import Optional, Dict, Any
from pydantic import BaseModel, validator, Field
from .utils.logger import SupersetLogger
# [/SECTION]
# [DEF:SupersetConfig:Class]
# @PURPOSE: Модель конфигурации для подключения к одному экземпляру Superset API.
# @RELATION: INHERITS_FROM -> pydantic.BaseModel
class SupersetConfig(BaseModel):
env: str = Field(..., description="Название окружения (например, dev, prod).")
base_url: str = Field(..., description="Базовый URL Superset API, включая /api/v1.")
auth: Dict[str, Any] = Field(..., description="Словарь с данными для аутентификации (provider, username, password, refresh).")
verify_ssl: bool = Field(True, description="Флаг для проверки SSL-сертификатов.")
timeout: int = Field(30, description="Таймаут в секундах для HTTP-запросов.")
logger: Optional[SupersetLogger] = Field(None, description="Экземпляр логгера для логирования.")
# [DEF:validate_auth:Function]
# @PURPOSE: Проверяет, что словарь `auth` содержит все необходимые для аутентификации поля.
# @PRE: `v` должен быть словарем.
# @POST: Возвращает `v`, если все обязательные поля (`provider`, `username`, `password`, `refresh`) присутствуют.
# @THROW: ValueError - Если отсутствуют обязательные поля.
# @PARAM: v (Dict[str, str]) - Значение поля auth.
@validator('auth')
def validate_auth(cls, v: Dict[str, Any]) -> Dict[str, Any]:
required = {'provider', 'username', 'password', 'refresh'}
if not required.issubset(v.keys()):
raise ValueError(f"Словарь 'auth' должен содержать поля: {required}. Отсутствующие: {required - v.keys()}")
return v
# [/DEF:validate_auth:Function]
# [DEF:normalize_base_url:Function]
# @PURPOSE: Нормализует `base_url`, добавляя `/api/v1`, если он отсутствует.
# @PRE: `v` должна быть строкой.
# @POST: Возвращает нормализованный `v`.
# @THROW: ValueError - Если формат URL невалиден.
# @PARAM: v (str) - Значение поля base_url.
@validator('base_url')
def normalize_base_url(cls, v: str) -> str:
v = v.strip()
if not v.startswith(('http://', 'https://')):
raise ValueError(f"Invalid URL scheme: {v}. Must start with http:// or https://")
if '/api/v1' not in v:
v = f"{v.rstrip('/')}/api/v1"
return v
# [/DEF:normalize_base_url:Function]
class Config:
arbitrary_types_allowed = True
# [/DEF:SupersetConfig:Class]
# [DEF:DatabaseConfig:Class]
# @PURPOSE: Модель для параметров трансформации баз данных при миграции дашбордов.
# @RELATION: INHERITS_FROM -> pydantic.BaseModel
class DatabaseConfig(BaseModel):
database_config: Dict[str, Dict[str, Any]] = Field(..., description="Словарь, содержащий 'old' и 'new' конфигурации базы данных.")
logger: Optional[SupersetLogger] = Field(None, description="Экземпляр логгера для логирования.")
# [DEF:validate_config:Function]
# @PURPOSE: Проверяет, что словарь `database_config` содержит ключи 'old' и 'new'.
# @PRE: `v` должен быть словарем.
# @POST: Возвращает `v`, если ключи 'old' и 'new' присутствуют.
# @THROW: ValueError - Если отсутствуют обязательные ключи.
# @PARAM: v (Dict[str, Dict[str, Any]]) - Значение поля database_config.
@validator('database_config')
def validate_config(cls, v: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
if not {'old', 'new'}.issubset(v.keys()):
raise ValueError("'database_config' должен содержать ключи 'old' и 'new'.")
return v
# [/DEF:validate_config:Function]
class Config:
arbitrary_types_allowed = True
# [/DEF:DatabaseConfig:Class]
# [/DEF:superset_tool.models:Module]

View File

@@ -1,5 +0,0 @@
# [DEF:superset_tool.utils:Module]
# @SEMANTICS: package, utils
# @PURPOSE: Utility package for superset_tool.
# @LAYER: Infra
# [/DEF:superset_tool.utils:Module]

View File

@@ -1,110 +0,0 @@
# [DEF:superset_tool.utils.init_clients:Module]
#
# @SEMANTICS: utility, factory, client, initialization, configuration
# @PURPOSE: Централизованно инициализирует клиенты Superset для различных окружений (DEV, PROD, SBX, PREPROD), используя `keyring` для безопасного доступа к паролям.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> superset_tool.models
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> keyring
# @PUBLIC_API: setup_clients
# [SECTION: IMPORTS]
import keyring
import os
from typing import Dict, List, Optional, Any
from superset_tool.models import SupersetConfig
from superset_tool.client import SupersetClient
from superset_tool.utils.logger import SupersetLogger
# [/SECTION]
# [DEF:setup_clients:Function]
# @PURPOSE: Инициализирует и возвращает словарь клиентов `SupersetClient`.
# @PRE: `logger` должен быть валидным экземпляром `SupersetLogger`.
# @POST: Возвращает словарь с инициализированными клиентами.
# @THROW: Exception - При любых других ошибках инициализации.
# @RELATION: CREATES_INSTANCE_OF -> SupersetConfig
# @RELATION: CREATES_INSTANCE_OF -> SupersetClient
# @PARAM: logger (SupersetLogger) - Экземпляр логгера для записи процесса.
# @PARAM: custom_envs (List[Dict[str, Any]]) - Список пользовательских настроек окружений.
# @RETURN: Dict[str, SupersetClient] - Словарь, где ключ - имя окружения, значение - `SupersetClient`.
def setup_clients(logger: SupersetLogger, custom_envs: Optional[List[Any]] = None) -> Dict[str, SupersetClient]:
logger.info("[setup_clients][Enter] Starting Superset clients initialization.")
clients = {}
try:
# Try to load from ConfigManager if available
try:
from backend.src.dependencies import get_config_manager
config_manager = get_config_manager()
envs = config_manager.get_environments()
if envs:
logger.info("[setup_clients][Action] Loading environments from ConfigManager")
for env in envs:
logger.debug("[setup_clients][State] Creating config for environment: %s", env.name)
config = SupersetConfig(
env=env.name,
base_url=env.url,
auth={"provider": "db", "username": env.username, "password": env.password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env.name] = SupersetClient(config, logger)
return clients
except (ImportError, Exception) as e:
logger.debug(f"[setup_clients][State] ConfigManager not available or failed: {e}")
if custom_envs:
for env in custom_envs:
# Handle both dict and object (like Pydantic model)
env_name = str(getattr(env, 'name', env.get('name') if isinstance(env, dict) else "unknown"))
base_url = str(getattr(env, 'url', env.get('url') if isinstance(env, dict) else ""))
username = str(getattr(env, 'username', env.get('username') if isinstance(env, dict) else ""))
password = str(getattr(env, 'password', env.get('password') if isinstance(env, dict) else ""))
logger.debug("[setup_clients][State] Creating config for custom environment: %s", env_name)
config = SupersetConfig(
env=env_name,
base_url=base_url,
auth={"provider": "db", "username": username, "password": password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env_name] = SupersetClient(config, logger)
else:
# Fallback to hardcoded environments with keyring
environments = {
"dev": "https://devta.bi.dwh.rusal.com/api/v1",
"prod": "https://prodta.bi.dwh.rusal.com/api/v1",
"sbx": "https://sandboxta.bi.dwh.rusal.com/api/v1",
"preprod": "https://preprodta.bi.dwh.rusal.com/api/v1",
"uatta": "https://uatta.bi.dwh.rusal.com/api/v1",
"dev5":"https://dev.bi.dwh.rusal.com/api/v1"
}
for env_name, base_url in environments.items():
logger.debug("[setup_clients][State] Creating config for environment: %s", env_name.upper())
password = keyring.get_password("system", f"{env_name} migrate")
if not password:
logger.warning(f"Пароль для '{env_name} migrate' не найден в keyring. Пропускаем.")
continue
config = SupersetConfig(
env=env_name,
base_url=base_url,
auth={"provider": "db", "username": "migrate_user", "password": password, "refresh": "true"},
verify_ssl=False,
timeout=30,
logger=logger
)
clients[env_name] = SupersetClient(config, logger)
logger.info("[setup_clients][Exit] All clients (%s) initialized successfully.", ', '.join(clients.keys()))
return clients
except Exception as e:
logger.critical("[setup_clients][Failure] Critical error during client initialization: %s", e, exc_info=True)
raise
# [/DEF:setup_clients:Function]
# [/DEF:superset_tool.utils.init_clients:Module]

View File

@@ -1,156 +0,0 @@
# [DEF:superset_tool.utils.logger:Module]
#
# @SEMANTICS: logging, utility, infrastructure, wrapper
# @PURPOSE: Предоставляет универсальную обёртку над стандартным `logging.Logger` для унифицированного создания и управления логгерами с выводом в консоль и/или файл.
# @LAYER: Infra
# @RELATION: WRAPS -> logging.Logger
#
# @INVARIANT: Логгер всегда должен иметь имя.
# @PUBLIC_API: SupersetLogger
# [SECTION: IMPORTS]
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional, Any, Mapping, Generator
from contextlib import contextmanager
# [/SECTION]
# [DEF:belief_scope:Function]
# @PURPOSE: Context manager for belief state logging to maintain execution coherence.
# @PRE: scope_id must be a string.
# @POST: Entry and exit actions are logged.
# @PARAM: scope_id (str) - Identifier for the logical scope.
@contextmanager
def belief_scope(scope_id: str) -> Generator[None, None, None]:
"""Context manager for belief state logging."""
logger = logging.getLogger("superset_tool")
logger.debug(f"[BELIEF_ENTRY] {scope_id}")
try:
yield
finally:
logger.debug(f"[BELIEF_EXIT] {scope_id}")
# [/DEF:belief_scope:Function]
# [DEF:SupersetLogger:Class]
# @PURPOSE: Обёртка над `logging.Logger`, которая упрощает конфигурацию и использование логгеров.
# @RELATION: WRAPS -> logging.Logger
class SupersetLogger:
# [DEF:__init__:Function]
# @PURPOSE: Конфигурирует и инициализирует логгер, добавляя обработчики для файла и/или консоли.
# @PRE: Если log_dir указан, путь должен быть валидным (или создаваемым).
# @POST: `self.logger` готов к использованию с настроенными обработчиками.
# @PARAM: name (str) - Идентификатор логгера.
# @PARAM: log_dir (Optional[Path]) - Директория для сохранения лог-файлов.
# @PARAM: level (int) - Уровень логирования (e.g., `logging.INFO`).
# @PARAM: console (bool) - Флаг для включения вывода в консоль.
def __init__(self, name: str = "superset_tool", log_dir: Optional[Path] = None, level: int = logging.INFO, console: bool = True, logger: Optional[logging.Logger] = None) -> None:
with belief_scope("__init__"):
if logger:
self.logger = logger
return
self.logger = logging.getLogger(name)
self.logger.setLevel(level)
self.logger.propagate = False
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
if self.logger.hasHandlers():
self.logger.handlers.clear()
if log_dir:
log_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d")
file_handler = logging.FileHandler(log_dir / f"{name}_{timestamp}.log", encoding="utf-8")
file_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
if console:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
# [/DEF:__init__:Function]
# [DEF:_log:Function]
# @PURPOSE: (Helper) Универсальный метод для вызова соответствующего уровня логирования.
# @PRE: level_method должен быть вызываемым методом логгера. msg must be a string.
# @POST: Сообщение записано в лог.
# @PARAM: level_method (Any) - Метод логгера (info, debug, etc).
# @PARAM: msg (str) - Сообщение.
# @PARAM: args (Any) - Аргументы форматирования.
# @PARAM: extra (Optional[Mapping[str, Any]]) - Дополнительные данные.
# @PARAM: exc_info (bool) - Добавлять ли информацию об исключении.
def _log(self, level_method: Any, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("_log"):
level_method(msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:_log:Function]
# [DEF:info:Function]
# @PURPOSE: Записывает сообщение уровня INFO.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня INFO записано.
def info(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("info"):
self._log(self.logger.info, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:info:Function]
# [DEF:debug:Function]
# @PURPOSE: Записывает сообщение уровня DEBUG.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня DEBUG записано.
def debug(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("debug"):
self._log(self.logger.debug, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:debug:Function]
# [DEF:warning:Function]
# @PURPOSE: Записывает сообщение уровня WARNING.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня WARNING записано.
def warning(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("warning"):
self._log(self.logger.warning, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:warning:Function]
# [DEF:error:Function]
# @PURPOSE: Записывает сообщение уровня ERROR.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня ERROR записано.
def error(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("error"):
self._log(self.logger.error, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:error:Function]
# [DEF:critical:Function]
# @PURPOSE: Записывает сообщение уровня CRITICAL.
# @PRE: msg должен быть строкой.
# @POST: Сообщение уровня CRITICAL записано.
def critical(self, msg: str, *args: Any, extra: Optional[Mapping[str, Any]] = None, exc_info: bool = False) -> None:
with belief_scope("critical"):
self._log(self.logger.critical, msg, *args, extra=extra, exc_info=exc_info)
# [/DEF:critical:Function]
# [DEF:exception:Function]
# @PURPOSE: Записывает сообщение уровня ERROR вместе с трассировкой стека текущего исключения.
# @PRE: msg должен быть строкой.
# @POST: Сообщение об ошибке с traceback записано.
def exception(self, msg: str, *args: Any, **kwargs: Any) -> None:
with belief_scope("exception"):
self.logger.exception(msg, *args, **kwargs)
# [/DEF:exception:Function]
# [DEF:belief_scope:Method]
# @PURPOSE: Instance method wrapper for belief_scope context manager.
# @PRE: scope_id must be a string.
# @POST: Enters the belief scope.
@contextmanager
def belief_scope(self, scope_id: str) -> Generator[None, None, None]:
with belief_scope(scope_id):
yield
# [/DEF:belief_scope:Method]
# [/DEF:SupersetLogger:Class]
# [/DEF:superset_tool.utils.logger:Module]

View File

@@ -1,157 +0,0 @@
# [DEF:superset_tool.utils.whiptail_fallback:Module]
#
# @SEMANTICS: ui, fallback, console, utility, interactive
# @PURPOSE: Предоставляет плотный консольный UI-fallback для интерактивных диалогов, имитируя `whiptail` для систем, где он недоступен.
# @LAYER: UI
# @PUBLIC_API: menu, checklist, yesno, msgbox, inputbox, gauge
# [SECTION: IMPORTS]
import sys
from typing import List, Tuple, Optional, Any
from .logger import belief_scope
# [/SECTION]
# [DEF:menu:Function]
# @PURPOSE: Отображает меню выбора и возвращает выбранный элемент.
# @PARAM: title (str) - Заголовок меню.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PARAM: choices (List[str]) - Список вариантов для выбора.
# @PRE: choices must be a non-empty list of strings.
# @POST: Returns a tuple with return code and selected choice.
# @RETURN: Tuple[int, Optional[str]] - Кортеж (код возврата, выбранный элемент). rc=0 - успех.
def menu(title: str, prompt: str, choices: List[str], **kwargs) -> Tuple[int, Optional[str]]:
with belief_scope("menu"):
print(f"\n=== {title} ===\n{prompt}")
for idx, item in enumerate(choices, 1):
print(f"{idx}) {item}")
try:
raw = input("\nВведите номер (0 отмена): ").strip()
sel = int(raw)
return (0, choices[sel - 1]) if 0 < sel <= len(choices) else (1, None)
except (ValueError, IndexError):
return 1, None
# [/DEF:menu:Function]
# [DEF:checklist:Function]
# @PURPOSE: Отображает список с возможностью множественного выбора.
# @PARAM: title (str) - Заголовок.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PARAM: options (List[Tuple[str, str]]) - Список кортежей (значение, метка).
# @PRE: options must be a list of (value, label) tuples.
# @POST: Returns a list of selected values.
# @RETURN: Tuple[int, List[str]] - Кортеж (код возврата, список выбранных значений).
def checklist(title: str, prompt: str, options: List[Tuple[str, str]], **kwargs) -> Tuple[int, List[str]]:
with belief_scope("checklist"):
print(f"\n=== {title} ===\n{prompt}")
for idx, (val, label) in enumerate(options, 1):
print(f"{idx}) [{val}] {label}")
raw = input("\nВведите номера через запятую (пустой ввод → отказ): ").strip()
if not raw: return 1, []
try:
indices = {int(x.strip()) for x in raw.split(",") if x.strip()}
selected_values = [options[i - 1][0] for i in indices if 0 < i <= len(options)]
return 0, selected_values
except (ValueError, IndexError):
return 1, []
# [/DEF:checklist:Function]
# [DEF:yesno:Function]
# @PURPOSE: Задает вопрос с ответом да/нет.
# @PARAM: title (str) - Заголовок.
# @PARAM: question (str) - Вопрос для пользователя.
# @PRE: question must be a string.
# @POST: Returns boolean based on user input.
# @RETURN: bool - `True`, если пользователь ответил "да".
def yesno(title: str, question: str, **kwargs) -> bool:
with belief_scope("yesno"):
ans = input(f"\n=== {title} ===\n{question} (y/n): ").strip().lower()
return ans in ("y", "yes", "да", "д")
# [/DEF:yesno:Function]
# [DEF:msgbox:Function]
# @PURPOSE: Отображает информационное сообщение.
# @PARAM: title (str) - Заголовок.
# @PARAM: msg (str) - Текст сообщения.
# @PRE: msg must be a string.
# @POST: Message is printed to console.
def msgbox(title: str, msg: str, **kwargs) -> None:
with belief_scope("msgbox"):
print(f"\n=== {title} ===\n{msg}\n")
# [/DEF:msgbox:Function]
# [DEF:inputbox:Function]
# @PURPOSE: Запрашивает у пользователя текстовый ввод.
# @PARAM: title (str) - Заголовок.
# @PARAM: prompt (str) - Приглашение к вводу.
# @PRE: prompt must be a string.
# @POST: Returns user input string.
# @RETURN: Tuple[int, Optional[str]] - Кортеж (код возврата, введенная строка).
def inputbox(title: str, prompt: str, **kwargs) -> Tuple[int, Optional[str]]:
with belief_scope("inputbox"):
print(f"\n=== {title} ===")
val = input(f"{prompt}\n")
return (0, val) if val else (1, None)
# [/DEF:inputbox:Function]
# [DEF:_ConsoleGauge:Class]
# @PURPOSE: Контекстный менеджер для имитации `whiptail gauge` в консоли.
class _ConsoleGauge:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the gauge.
# @PRE: title must be a string.
# @POST: Instance initialized.
def __init__(self, title: str, **kwargs):
with belief_scope("__init__"):
self.title = title
# [/DEF:__init__:Function]
# [DEF:__enter__:Function]
# @PURPOSE: Enters the context.
# @PRE: Instance initialized.
# @POST: Header printed, returns self.
def __enter__(self):
with belief_scope("__enter__"):
print(f"\n=== {self.title} ===")
return self
# [/DEF:__enter__:Function]
# [DEF:__exit__:Function]
# @PURPOSE: Exits the context.
# @PRE: Context entered.
# @POST: Newline printed.
def __exit__(self, exc_type, exc_val, exc_tb):
with belief_scope("__exit__"):
sys.stdout.write("\n"); sys.stdout.flush()
# [/DEF:__exit__:Function]
# [DEF:set_text:Function]
# @PURPOSE: Sets the gauge text.
# @PRE: txt must be a string.
# @POST: Text written to stdout.
def set_text(self, txt: str) -> None:
with belief_scope("set_text"):
sys.stdout.write(f"\r{txt} "); sys.stdout.flush()
# [/DEF:set_text:Function]
# [DEF:set_percent:Function]
# @PURPOSE: Sets the gauge percentage.
# @PRE: percent must be an integer.
# @POST: Percentage written to stdout.
def set_percent(self, percent: int) -> None:
with belief_scope("set_percent"):
sys.stdout.write(f"{percent}%"); sys.stdout.flush()
# [/DEF:set_percent:Function]
# [/DEF:_ConsoleGauge:Class]
# [DEF:gauge:Function]
# @PURPOSE: Создает и возвращает экземпляр `_ConsoleGauge`.
# @PRE: title must be a string.
# @POST: Returns an instance of _ConsoleGauge.
# @PARAM: title (str) - Заголовок для индикатора прогресса.
# @RETURN: _ConsoleGauge - Экземпляр контекстного менеджера.
def gauge(title: str, **kwargs) -> _ConsoleGauge:
with belief_scope("gauge"):
return _ConsoleGauge(title, **kwargs)
# [/DEF:gauge:Function]
# [/DEF:superset_tool.utils.whiptail_fallback:Module]