init

2025-04-01 15:20:19 +03:00
commit 992073d2f5
10 changed files with 1028 additions and 0 deletions
--- a/superset_tool/README.md
+++ b/superset_tool/README.md
--- a/superset_tool/client.py
+++ b/superset_tool/client.py
@@ -0,0 +1,274 @@
+import requests
+from requests.exceptions import HTTPError
+import urllib3
+import json
+from typing import Dict, Optional, Tuple, List, Any
+from pydantic import BaseModel, Field
+from .utils.fileio import *
+from .exceptions import *
+from .models import SupersetConfig
+
+
+class SupersetClient:
+    def __init__(self, config: SupersetConfig):
+        self.config = config
+        self.session = requests.Session()
+        self._setup_session()
+        self._authenticate()
+    
+    def _setup_session(self):
+        adapter = requests.adapters.HTTPAdapter(
+            max_retries=3,
+            pool_connections=10,
+            pool_maxsize=100
+        )
+
+        if not self.config.verify_ssl:
+            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        
+        self.session.mount('https://', adapter)
+        self.session.verify = self.config.verify_ssl
+
+    def _authenticate(self):
+        try:
+            # Сначала логинимся для получения access_token
+            login_url = f"{self.config.base_url}/security/login"
+            response = self.session.post(
+                login_url,
+                json={
+                    "username": self.config.auth["username"],
+                    "password": self.config.auth["password"],
+                    "provider": self.config.auth["provider"],
+                    "refresh": True
+                },
+                verify=self.config.verify_ssl
+            )
+            response.raise_for_status()
+            self.access_token = response.json()["access_token"]
+
+            # Затем получаем CSRF токен с использованием access_token
+            csrf_url = f"{self.config.base_url}/security/csrf_token/"
+            response = self.session.get(
+                csrf_url,
+                headers={"Authorization": f"Bearer {self.access_token}"},
+                verify=self.config.verify_ssl
+            )
+            response.raise_for_status()
+            self.csrf_token = response.json()["result"]
+        except HTTPError as e:
+            if e.response.status_code == 401:
+                error_msg = "Invalid credentials" if "login" in e.request.url else "CSRF token fetch failed"
+                raise AuthenticationError(f"{error_msg}. Check auth configuration") from e
+            raise
+        
+
+    @property
+    def headers(self):
+        return {
+            "Authorization": f"Bearer {self.access_token}",
+            "X-CSRFToken": self.csrf_token,
+            "Referer": self.config.base_url,
+            "Content-Type": "application/json"
+        }
+
+    def get_dashboard(self, dashboard_id_or_slug: str ) -> Dict:
+        """
+        Получаем информацию по дашборду (если передан dashboard_id_or_slug), либо по всем дашбордам, если параметр не передан
+        Параметры:
+            :dashboard_id_or_slug - id или короткая ссылка
+        """
+        url = f"{self.config.base_url}/dashboard/{dashboard_id_or_slug}"
+        
+        try:
+            response = self.session.get(
+                url,
+                headers=self.headers,
+                timeout=self.config.timeout
+            )
+            response.raise_for_status()
+            return response.json()["result"]
+        except requests.exceptions.RequestException as e:
+            raise SupersetAPIError(f"Failed to get dashboard: {str(e)}") from e
+        
+
+    def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
+        """
+        Получаем информацию по всем дашбордам с учетом пагинации.
+
+        Параметры:
+            query (Optional[Dict]): Дополнительные параметры запроса, включая пагинацию и фильтры.
+
+        Возвращает:
+            Tuple[int, List[Dict]]: Кортеж, содержащий общее количество дашбордов и список всех дашбордов.
+        """
+        url = f"{self.config.base_url}/dashboard/"
+        modified_query: Dict = {}
+        all_results: List[Dict] = []
+        total_count: int = 0
+        current_page: int = 0
+        
+        try:
+            total_count = self.session.get(
+                url,
+                #q=modified_query,
+                headers=self.headers,
+                timeout=self.config.timeout
+            ).json()['count']
+        except requests.exceptions.RequestException as e:
+           raise SupersetAPIError(f"Ошибка при получении кол-ва дашбордов: {str(e)}") from e
+        #Инициализация параметров запроса с учетом переданного query
+        
+        if query:
+            modified_query = query.copy()
+            # Убедимся, что page_size установлен, если не передан
+            modified_query.setdefault("page_size", 20)
+        else:
+            modified_query = {
+                "columns": [
+                    "slug",
+                    "id",
+                    "changed_on_utc",
+                    "dashboard_title",
+                    "published"
+                ],
+                "page": 0,
+                "page_size": 20
+            }
+
+        page_size = modified_query["page_size"]
+
+        
+
+        total_pages = (total_count + page_size - 1) // page_size
+
+       
+        try:
+            while current_page < total_pages:
+                modified_query["page"] = current_page
+                response = self.session.get(
+                    url,
+                    headers=self.headers,
+                    params={"q": json.dumps(modified_query)} ,
+                    timeout=self.config.timeout
+                )
+
+                response.raise_for_status()
+                data = response.json()
+                all_results.extend(data.get("result", []))
+
+                current_page += 1
+                # Проверка, достигли ли последней страницы
+            return total_count, all_results
+        except requests.exceptions.RequestException as e:
+            raise SupersetAPIError(f"Ошибка при получении дашбордов: {str(e)}") from e
+
+    def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
+        """Экспортирует дашборд из Superset в виде ZIP-архива и возвращает его содержимое с именем файла.
+
+        Параметры:   
+           :dashboard_id (int): Идентификатор дашборда для экспорта
+
+        Возвращает:
+        Tuple[bytes, str]: Кортеж, содержащий:
+            - bytes: Бинарное содержимое ZIP-архива с дашбордом
+            - str: Имя файла (из заголовков ответа или в формате dashboard_{id}.zip)
+
+        Исключения:
+        SupersetAPIError: Вызывается при ошибках:
+            - Проблемы с сетью/соединением
+            - Невалидный ID дашборда
+            - Отсутствие прав доступа
+            - Ошибки сервера (status code >= 400)
+
+        Пример использования:
+            content, filename = client.export_dashboard(5)
+            with open(filename, 'wb') as f:
+                f.write(content)
+
+        Примечания:
+            - Для экспорта используется API Endpoint: /dashboard/export/
+            - Имя файла пытается извлечь из Content-Disposition заголовка
+            - По умолчанию возвращает имя в формате dashboard_{id}.zip
+            - Архив содержит JSON-метаданные и связанные элементы (датасеты, чарты)
+            """
+        url = f"{self.config.base_url}/dashboard/export/"
+        params = {"q": f"[{dashboard_id}]"}
+        
+        try:
+            response = self.session.get(
+                url,
+                headers=self.headers,
+                params=params,
+                timeout=self.config.timeout
+            )
+            response.raise_for_status()
+            
+            filename = get_filename_from_headers(response.headers) or f"dashboard_{dashboard_id}.zip"
+            return response.content, filename
+            
+        except requests.exceptions.RequestException as e:
+            raise SupersetAPIError(f"Export failed: {str(e)}") from e
+        
+
+    def import_dashboard(self, zip_path) -> Dict:
+        """Импортирует дашборд в Superset из ZIP-архива.
+
+        Параметры:
+            zip_path (Path): Путь к ZIP-файлу с дашбордом для импорта
+
+        Возвращает:
+            dict: Ответ API в формате JSON с результатами импорта
+
+        Исключения:
+            RuntimeError: Вызывается при:
+                - Ошибках сети/соединения
+                - Невалидном формате ZIP-архива
+                - Конфликте прав доступа
+                - Ошибках сервера (status code >= 400)
+                - Попытке перезаписи без соответствующих прав
+
+        Пример использования:
+            result = client.import_dashboard(Path("my_dashboard.zip"))
+            print(f"Импортирован дашборд: {result['title']}")
+
+        Примечания:
+            - Использует API Endpoint: /dashboard/import/
+            - Автоматически устанавливает overwrite=true для перезаписи существующих дашбордов
+            - Удваивает стандартный таймаут для обработки длительных операций
+            - Удаляет Content-Type заголовок (автоматически генерируется для multipart/form-data)
+            - Архив должен содержать валидные JSON-метаданные в формате Superset
+            - Ответ может содержать информацию о созданных/обновленных ресурсах (датасеты, чарты, владельцы)
+            - При конфликте имен может потребоваться ручное разрешение через параметры импорта
+        """
+        url = f"{self.config.base_url}/dashboard/import/"
+
+        headers_without_content_type = {k: v for k, v in self.headers.items() if k.lower() != 'content-type'}
+        
+        zip_name = zip_path.name
+        # Подготавливаем данные для multipart/form-data
+        with open(zip_path, 'rb') as f:
+            files = {
+                'formData': (
+                    zip_name,  # Имя файла
+                    f,  # Файловый объект
+                    'application/x-zip-compressed'  # MIME-тип из curl
+                )
+            }
+            
+            # Отправляем запрос
+            response = self.session.post(
+                        url,
+                        files=files,
+                        data={'overwrite': 'true'},
+                        headers=headers_without_content_type,
+                        timeout=self.config.timeout * 2  # Longer timeout for imports
+                    )
+        # Обрабатываем ответ
+        try:
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.HTTPError as e:
+            error_detail = f"{e.response.status_code} {e.response.reason}"
+            if e.response.text:
+                error_detail += f"\nТело ответа: {e.response.text}"
+            raise RuntimeError(f"Ошибка импорта: {error_detail}") from e
--- a/superset_tool/exceptions.py
+++ b/superset_tool/exceptions.py
@@ -0,0 +1,14 @@
+class SupersetToolError(Exception):
+    """Base exception class for all tool errors"""
+
+class AuthenticationError(SupersetToolError):
+    """Authentication related errors"""
+
+class SupersetAPIError(SupersetToolError):
+    """General API communication errors"""
+
+class ExportError(SupersetToolError):
+    """Dashboard export errors"""
+
+class ImportError(SupersetToolError):
+    """Dashboard import errors"""
--- a/superset_tool/models.py
+++ b/superset_tool/models.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+
+class SupersetConfig(BaseModel):
+    base_url: str
+    auth: dict  # Словарь с параметрами аутентификации
+    verify_ssl: bool = True
+    timeout: int = 30
+
+class DatabaseConfig(BaseModel):
+    database_config: dict
--- a/superset_tool/utils/fileio.py
+++ b/superset_tool/utils/fileio.py
@@ -0,0 +1,355 @@
+import re
+import zipfile
+from pathlib import Path
+from typing import Optional, Tuple, Dict
+import datetime
+import shutil
+import yaml
+import tempfile
+import os
+from contextlib import contextmanager
+
+
+@contextmanager
+def create_temp_file(content: bytes, suffix: str = ".zip"):
+    try:
+        with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
+            tmp.write(content)
+            tmp.flush()
+        yield Path(tmp.name)
+    finally:
+        if Path(tmp.name).exists():
+            Path(tmp.name).unlink()
+
+def save_and_unpack_dashboard(
+    zip_content: bytes,
+    output_dir: str = "dashboards",
+    unpack: bool = False,
+    original_filename: Optional[str] = None
+) -> Tuple[Path, Path]:
+    """Сохраняет и распаковывает дашборд с учетом оригинального имени"""
+    try:
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        # Генерируем имя файла
+        if original_filename:
+            zip_name = sanitize_filename(original_filename)
+        else:
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            zip_name = f"dashboard_export_{timestamp}.zip"
+
+        zip_path = output_path / zip_name
+
+        # Сохраняем ZIP-файл
+        with open(zip_path, "wb") as f:
+            f.write(zip_content)
+        
+        if unpack:
+            # Распаковываем в поддиректорию с именем архива
+            #extract_dir_name = zip_path.stem
+            extract_path = output_path 
+
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extractall(extract_path)
+            
+            return zip_path, extract_path
+        
+        return zip_path
+        
+
+    except Exception as e:
+        raise RuntimeError(f"Failed to unpack dashboard: {str(e)}") from e
+
+
+def get_filename_from_headers(headers: dict) -> Optional[str]:
+    """Извлекает имя файла из заголовков HTTP-ответа"""
+    content_disposition = headers.get("Content-Disposition", "")
+
+    # Пытаемся найти имя файла в кавычках
+    filename_match = re.findall(r'filename="(.+?)"', content_disposition)
+    if not filename_match:
+        # Пробуем без кавычек
+        filename_match = re.findall(r'filename=([^;]+)', content_disposition)
+
+    if filename_match:
+        return filename_match[0].strip('"')
+    return None
+
+
+def sanitize_filename(filename: str) -> str:
+    """Очищает имя файла от потенциально опасных символов"""
+    return re.sub(r'[\\/*?:"<>|]', "_", filename).strip()
+
+
+def archive_exports(
+    output_dir: str,
+    daily_retention: int = 7,
+    weekly_retention: int = 4,
+    monthly_retention: int = 12,
+    yearly_retention: Optional[int] = None
+):
+    """Управление историей экспортов по политике GFS (Grandfather-Father-Son)
+    Параметры:
+    :daily_retention: 
+    :weekly_retention:
+    :monthly_retention:
+    :yearly_retention: Optional[int] 
+    Извлекает даты из стандартного суперсетовсого архива вида, либо берет дату изменения архива
+    dashboard_export_20250326T121517.zip"""
+    
+    export_dir = Path(output_dir)
+    files_with_dates = []
+    
+    # Собираем файлы с их датами
+    for file in export_dir.glob("*.zip"):
+        # Извлекаем временную метку из имени файла
+        try:
+            # Разбиваем имя файла по шаблону: dashboard_export_YYYYMMDDTHHMMSS.zip
+            timestamp_str = file.stem.split('_')[-1]
+            date_str = timestamp_str.split('T')[0]  # Отделяем дату от времени
+            date = datetime.datetime.strptime(date_str, "%Y%m%d").date()
+        except (ValueError, IndexError):
+            # Если не удалось распарсить - используем дату изменения файла
+            mtime = file.stat().st_mtime
+            date = datetime.datetime.fromtimestamp(mtime).date()
+        
+        files_with_dates.append((file, date))
+    
+    # Сортируем файлы по дате (новые сначала)
+    files_with_dates.sort(key=lambda x: x[1], reverse=True)
+    
+    # Создаем группы для разных уровней резервирования
+    daily_groups = {}
+    weekly_groups = {}
+    monthly_groups = {}
+    yearly_groups = {}
+
+    for file, date in files_with_dates:
+        # Группировка по дням
+        daily_groups.setdefault(date, file)
+        
+        # Группировка по неделям
+        year, week, _ = date.isocalendar()
+        weekly_groups.setdefault((year, week), file)
+        
+        # Группировка по месяцам
+        monthly_groups.setdefault((date.year, date.month), file)
+        
+        # Группировка по годам
+        yearly_groups.setdefault(date.year, file)
+    
+    # Выбираем файлы для сохранения
+    keep_files = set()
+    
+    # Daily - последние N дней
+    sorted_daily = sorted(daily_groups.keys(), reverse=True)[:daily_retention]
+    keep_files.update(daily_groups[d] for d in sorted_daily)
+    
+    # Weekly - последние N недель
+    sorted_weekly = sorted(weekly_groups.keys(), reverse=True)[:weekly_retention]
+    keep_files.update(weekly_groups[w] for w in sorted_weekly)
+    
+    # Monthly - последние N месяцев
+    sorted_monthly = sorted(monthly_groups.keys(), reverse=True)[:monthly_retention]
+    keep_files.update(monthly_groups[m] for m in sorted_monthly)
+    
+    # Yearly - все или последние N лет
+    if yearly_retention is not None:
+        sorted_yearly = sorted(yearly_groups.keys(), reverse=True)[:yearly_retention]
+        keep_files.update(yearly_groups[y] for y in sorted_yearly)
+    else:
+        keep_files.update(yearly_groups.values())
+    
+    # Удаляем неподходящие файлы и директории
+    for file, _ in files_with_dates:
+        if file not in keep_files:
+            file.unlink()
+            # unpacked_dir = export_dir / file.stem
+            # if unpacked_dir.exists():
+            #     shutil.rmtree(unpacked_dir)
+
+
+def determine_and_load_yaml_type(file_path):
+    with open(file_path, 'r') as f:
+        data = yaml.safe_load(f)
+
+    if 'dashboard_title' in data and 'position' in data:
+        return data, 'dashboard'
+    elif 'sqlalchemy_uri' in data and 'database_name' in data:
+        return data, 'database'
+    elif 'table_name' in data and ('sql' in data or 'columns' in data):
+        return data, 'dataset'
+    elif 'slice_name' in data and 'viz_type' in data:
+        return data, 'chart'
+    else:
+        return data, 'unknown'
+
+
+def update_db_yaml(db_config: Dict = None, path: str = "dashboards", verbose: bool = False) -> None:
+    """
+    Обновляет конфигурации в YAML-файлах баз данных согласно переданному словарю замен
+
+    :param db_config: Словарь с параметрами для замены (ключ: значение)
+    :param path: Путь к папке с YAML-файлами (по умолчанию 'databases')
+    """
+    # Устанавливаем дефолтные значения
+    db_config = db_config or {}
+
+    # Ищем все YAML-файлы в указанной папке
+    databases_dir = Path(path)
+    yaml_files = databases_dir.rglob("*.yaml")
+
+    for file_path in yaml_files:
+        try:
+            # Чтение и загрузка YAML
+
+            data, yaml_type = determine_and_load_yaml_type(file_path) or {}
+
+            # Обновляем только существующие ключи
+            updates = {
+                k: v
+                for k, v in db_config.items()
+                if (
+                    k in data                    # ключ есть в data
+                    and data[k] != v             # значение отличается
+                    and (
+                        # для database — все ключи
+                        (yaml_type == "database") or
+                        # для dataset — исключаем uuid
+                        (yaml_type == "dataset" and k != "uuid")
+                    )
+                )
+            }
+
+            # Обновляем data
+            data.update(updates)
+            if verbose and updates:
+                print(f"Обработан {file_path}")
+                print(updates)
+
+            # Сохранение с сохранением структуры файла
+            with open(file_path, 'w') as file:
+                yaml.dump(
+                    data,
+                    file,
+                    default_flow_style=False,
+                    sort_keys=False,
+                    allow_unicode=True
+                )
+
+        except Exception as e:
+            print(f"Ошибка при обработке файла {file_path}: {str(e)}")
+
+def sync_for_git(
+    source_path: str,
+    destination_path: str,
+    dry_run: bool = False,
+    verbose: bool = False
+) -> None:
+    """
+    Синхронизирует содержимое папки source_path с destination_path.
+
+    Перезаписывает файлы в destination_path файлами из source_path.
+    Удаляет файлы и пустые директории в destination_path, которые отсутствуют в source_path 
+    (исключая папку .git).
+
+    :param source_path: Путь к папке с данными (источник)
+    :param destination_path: Путь к папке назначения
+    :param dry_run: Режим имитации (не вносит реальных изменений)
+    :param verbose: Подробный вывод операций
+    """
+    source_files = set()
+    for root, _, files in os.walk(source_path):
+        for file in files:
+            rel_path = os.path.relpath(os.path.join(root, file), source_path)
+            source_files.add(rel_path)
+
+    destination_files = set()
+    for root, _, files in os.walk(destination_path):
+        for file in files:
+            rel_path = os.path.relpath(os.path.join(root, file), destination_path)
+            destination_files.add(rel_path)
+
+    # Копирование/обновление файлов
+    for file in source_files:
+        src = os.path.join(source_path, file)
+        dst = os.path.join(destination_path, file)
+        dest_dir = os.path.dirname(dst)
+
+        if verbose:
+            status = "[DRY-RUN] " if dry_run else ""
+            print(f"{status}Creating directory: {dest_dir}")
+
+        if not dry_run:
+            os.makedirs(dest_dir, exist_ok=True)
+
+        if verbose:
+            status = "[DRY-RUN] " if dry_run else ""
+            print(f"{status}Copying: {file}")
+
+        if not dry_run:
+            shutil.copy2(src, dst)
+
+    # Удаление лишних файлов
+    files_to_delete = destination_files - source_files
+    git_dir = Path(destination_path) / ".git"
+
+    for file in files_to_delete:
+        target = Path(destination_path) / file
+        
+        # Пропускаем .git и его содержимое
+        try:
+            if git_dir in target.parents or target == git_dir:
+                if verbose:
+                    print(f"Skipping .git item: {target}")
+                continue
+        except ValueError:
+            pass
+
+        if verbose:
+            action = "Would delete" if dry_run else "Deleting"
+            print(f"{action}: {target}")
+
+        if not dry_run:
+            try:
+                if target.is_file():
+                    target.unlink()
+                elif target.is_dir():
+                    shutil.rmtree(target)
+            except OSError as e:
+                print(f"Error deleting {target}: {e}")
+
+    # Удаление пустых директорий
+    if verbose:
+        print("\nChecking for empty directories...")
+    
+    git_dir = Path(destination_path) / ".git"
+    deleted_dirs = set()
+
+    # Проходим снизу вверх (от вложенных директорий к корневым)
+    for root, dirs, files in os.walk(destination_path, topdown=False):
+        for dir_name in dirs:
+            dir_path = Path(root) / dir_name
+            
+            # Пропускаем .git и его поддиректории
+            try:
+                if git_dir in dir_path.parents or dir_path == git_dir:
+                    continue
+            except ValueError:
+                pass
+            
+            # Проверяем что директория пуста и не была удалена ранее
+            if dir_path not in deleted_dirs and not any(dir_path.iterdir()):
+                if verbose:
+                    status = "[DRY-RUN] " if dry_run else ""
+                    print(f"{status}Deleting empty directory: {dir_path}")
+                
+                if not dry_run:
+                    try:
+                        dir_path.rmdir()
+                        deleted_dirs.add(dir_path)
+                    except OSError as e:
+                        print(f"Error deleting directory {dir_path}: {e}")
+
+
--- a/superset_tool/utils/logger.py
+++ b/superset_tool/utils/logger.py
@@ -0,0 +1,5 @@
+# utils/logger.py
+def configure_logger():
+    logger = logging.getLogger("superset_migration")
+    # Настройка формата, обработчиков и уровня логирования
+    return logger