65 lines
2.8 KiB
Python
65 lines
2.8 KiB
Python
# [DEF:translation_engine:Module]
|
|
import logging
|
|
from typing import List
|
|
|
|
# Local imports - require files to be in the same folder
|
|
try:
|
|
from domain_models import TranslationContext, SubtitleLine
|
|
from llm_core import GeminiProcessor
|
|
except ImportError as e:
|
|
raise ImportError(f"Failed to import local modules: {e}. Ensure domain_models.py and llm_core.py exist.")
|
|
|
|
logger = logging.getLogger("Engine")
|
|
|
|
class SubtitleOrchestrator:
|
|
def __init__(self):
|
|
self.llm = GeminiProcessor()
|
|
self.context: TranslationContext = None # type: ignore
|
|
|
|
def pass_one_analysis(self, full_text: str) -> TranslationContext:
|
|
logger.info("[PassOne] Starting Analysis...")
|
|
prompt = "ACT AS: Narrative Analyst. Identify Genre, Tone, Characters, and Plot."
|
|
self.context = self.llm.generate_structured(prompt, full_text, TranslationContext)
|
|
return self.context
|
|
|
|
def pass_two_translation(self, subtitle_lines: List[SubtitleLine], chunk_size: int = 20) -> List[SubtitleLine]:
|
|
if not self.context:
|
|
raise RuntimeError("Context must be analyzed before translation.")
|
|
|
|
logger.info(f"[PassTwo] Translating {len(subtitle_lines)} lines.")
|
|
|
|
# Serialize context for the LLM
|
|
char_desc = "\n".join([f"- {c.name}: {c.role}, {c.speech_style}" for c in self.context.characters])
|
|
system_instruction = (
|
|
f"CONTEXT:\nTitle: {self.context.title}\nGenre: {self.context.genre}\n"
|
|
f"Style: {self.context.style_guidelines}\n\nCHARACTERS:\n{char_desc}\n\n"
|
|
"TASK: Translate subtitles to Russian. Format: 'ID || Text'. Maintain timing."
|
|
)
|
|
|
|
translated_lines = []
|
|
for i in range(0, len(subtitle_lines), chunk_size):
|
|
chunk = subtitle_lines[i : i + chunk_size]
|
|
input_text = "\n".join([f"{sub.index} || {sub.original_text}" for sub in chunk])
|
|
|
|
try:
|
|
response_text = self.llm.generate_text(system_instruction, input_text)
|
|
|
|
# Parsing logic
|
|
mapping = {}
|
|
for line in response_text.strip().split('\n'):
|
|
if " || " in line:
|
|
parts = line.split(" || ", 1)
|
|
if parts[0].strip().isdigit():
|
|
mapping[int(parts[0].strip())] = parts[1].strip()
|
|
|
|
for sub in chunk:
|
|
sub.translated_text = mapping.get(sub.index, sub.original_text)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Chunk failed: {e}")
|
|
for sub in chunk: sub.translated_text = sub.original_text # Fallback
|
|
|
|
translated_lines.extend(chunk)
|
|
|
|
return translated_lines
|
|
# [/DEF:translation_engine] |