init commit

This commit is contained in:
2025-12-14 19:00:23 +03:00
commit 0ab7c64fbc
4 changed files with 219 additions and 0 deletions

65
translation_engine.py Normal file
View File

@@ -0,0 +1,65 @@
# [DEF:translation_engine:Module]
import logging
from typing import List
# Local imports - require files to be in the same folder
try:
from domain_models import TranslationContext, SubtitleLine
from llm_core import GeminiProcessor
except ImportError as e:
raise ImportError(f"Failed to import local modules: {e}. Ensure domain_models.py and llm_core.py exist.")
logger = logging.getLogger("Engine")
class SubtitleOrchestrator:
def __init__(self):
self.llm = GeminiProcessor()
self.context: TranslationContext = None # type: ignore
def pass_one_analysis(self, full_text: str) -> TranslationContext:
logger.info("[PassOne] Starting Analysis...")
prompt = "ACT AS: Narrative Analyst. Identify Genre, Tone, Characters, and Plot."
self.context = self.llm.generate_structured(prompt, full_text, TranslationContext)
return self.context
def pass_two_translation(self, subtitle_lines: List[SubtitleLine], chunk_size: int = 20) -> List[SubtitleLine]:
if not self.context:
raise RuntimeError("Context must be analyzed before translation.")
logger.info(f"[PassTwo] Translating {len(subtitle_lines)} lines.")
# Serialize context for the LLM
char_desc = "\n".join([f"- {c.name}: {c.role}, {c.speech_style}" for c in self.context.characters])
system_instruction = (
f"CONTEXT:\nTitle: {self.context.title}\nGenre: {self.context.genre}\n"
f"Style: {self.context.style_guidelines}\n\nCHARACTERS:\n{char_desc}\n\n"
"TASK: Translate subtitles to Russian. Format: 'ID || Text'. Maintain timing."
)
translated_lines = []
for i in range(0, len(subtitle_lines), chunk_size):
chunk = subtitle_lines[i : i + chunk_size]
input_text = "\n".join([f"{sub.index} || {sub.original_text}" for sub in chunk])
try:
response_text = self.llm.generate_text(system_instruction, input_text)
# Parsing logic
mapping = {}
for line in response_text.strip().split('\n'):
if " || " in line:
parts = line.split(" || ", 1)
if parts[0].strip().isdigit():
mapping[int(parts[0].strip())] = parts[1].strip()
for sub in chunk:
sub.translated_text = mapping.get(sub.index, sub.original_text)
except Exception as e:
logger.error(f"Chunk failed: {e}")
for sub in chunk: sub.translated_text = sub.original_text # Fallback
translated_lines.extend(chunk)
return translated_lines
# [/DEF:translation_engine]