subtitle_translator/translation_engine.py

# [DEF:translation_engine:Module]
import logging
from typing import List

# Local imports - require files to be in the same folder
try:
    from domain_models import TranslationContext, SubtitleLine
    from llm_core import GeminiProcessor
except ImportError as e:
    raise ImportError(f"Failed to import local modules: {e}. Ensure domain_models.py and llm_core.py exist.")

logger = logging.getLogger("Engine")

class SubtitleOrchestrator:
    def __init__(self):
        self.llm = GeminiProcessor()
        self.context: TranslationContext = None # type: ignore

    def pass_one_analysis(self, full_text: str) -> TranslationContext:
        logger.info("[PassOne] Starting Analysis...")
        prompt = "ACT AS: Narrative Analyst. Identify Genre, Tone, Characters, and Plot."
        self.context = self.llm.generate_structured(prompt, full_text, TranslationContext)
        return self.context

    def pass_two_translation(self, subtitle_lines: List[SubtitleLine], chunk_size: int = 20) -> List[SubtitleLine]:
        if not self.context:
            raise RuntimeError("Context must be analyzed before translation.")

        logger.info(f"[PassTwo] Translating {len(subtitle_lines)} lines.")

        # Serialize context for the LLM
        char_desc = "\n".join([f"- {c.name}: {c.role}, {c.speech_style}" for c in self.context.characters])
        system_instruction = (
            f"CONTEXT:\nTitle: {self.context.title}\nGenre: {self.context.genre}\n"
            f"Style: {self.context.style_guidelines}\n\nCHARACTERS:\n{char_desc}\n\n"
            "TASK: Translate subtitles to Russian. Format: 'ID || Text'. Maintain timing."
        )

        translated_lines = []
        for i in range(0, len(subtitle_lines), chunk_size):
            chunk = subtitle_lines[i : i + chunk_size]
            input_text = "\n".join([f"{sub.index} || {sub.original_text}" for sub in chunk])

            try:
                response_text = self.llm.generate_text(system_instruction, input_text)

                # Parsing logic
                mapping = {}
                for line in response_text.strip().split('\n'):
                    if " || " in line:
                        parts = line.split(" || ", 1)
                        if parts[0].strip().isdigit():
                            mapping[int(parts[0].strip())] = parts[1].strip()

                for sub in chunk:
                    sub.translated_text = mapping.get(sub.index, sub.original_text)

            except Exception as e:
                logger.error(f"Chunk failed: {e}")
                for sub in chunk: sub.translated_text = sub.original_text # Fallback

            translated_lines.extend(chunk)

        return translated_lines
# [/DEF:translation_engine]