init commit
This commit is contained in:
65
translation_engine.py
Normal file
65
translation_engine.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# [DEF:translation_engine:Module]
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
# Local imports - require files to be in the same folder
|
||||
try:
|
||||
from domain_models import TranslationContext, SubtitleLine
|
||||
from llm_core import GeminiProcessor
|
||||
except ImportError as e:
|
||||
raise ImportError(f"Failed to import local modules: {e}. Ensure domain_models.py and llm_core.py exist.")
|
||||
|
||||
logger = logging.getLogger("Engine")
|
||||
|
||||
class SubtitleOrchestrator:
|
||||
def __init__(self):
|
||||
self.llm = GeminiProcessor()
|
||||
self.context: TranslationContext = None # type: ignore
|
||||
|
||||
def pass_one_analysis(self, full_text: str) -> TranslationContext:
|
||||
logger.info("[PassOne] Starting Analysis...")
|
||||
prompt = "ACT AS: Narrative Analyst. Identify Genre, Tone, Characters, and Plot."
|
||||
self.context = self.llm.generate_structured(prompt, full_text, TranslationContext)
|
||||
return self.context
|
||||
|
||||
def pass_two_translation(self, subtitle_lines: List[SubtitleLine], chunk_size: int = 20) -> List[SubtitleLine]:
|
||||
if not self.context:
|
||||
raise RuntimeError("Context must be analyzed before translation.")
|
||||
|
||||
logger.info(f"[PassTwo] Translating {len(subtitle_lines)} lines.")
|
||||
|
||||
# Serialize context for the LLM
|
||||
char_desc = "\n".join([f"- {c.name}: {c.role}, {c.speech_style}" for c in self.context.characters])
|
||||
system_instruction = (
|
||||
f"CONTEXT:\nTitle: {self.context.title}\nGenre: {self.context.genre}\n"
|
||||
f"Style: {self.context.style_guidelines}\n\nCHARACTERS:\n{char_desc}\n\n"
|
||||
"TASK: Translate subtitles to Russian. Format: 'ID || Text'. Maintain timing."
|
||||
)
|
||||
|
||||
translated_lines = []
|
||||
for i in range(0, len(subtitle_lines), chunk_size):
|
||||
chunk = subtitle_lines[i : i + chunk_size]
|
||||
input_text = "\n".join([f"{sub.index} || {sub.original_text}" for sub in chunk])
|
||||
|
||||
try:
|
||||
response_text = self.llm.generate_text(system_instruction, input_text)
|
||||
|
||||
# Parsing logic
|
||||
mapping = {}
|
||||
for line in response_text.strip().split('\n'):
|
||||
if " || " in line:
|
||||
parts = line.split(" || ", 1)
|
||||
if parts[0].strip().isdigit():
|
||||
mapping[int(parts[0].strip())] = parts[1].strip()
|
||||
|
||||
for sub in chunk:
|
||||
sub.translated_text = mapping.get(sub.index, sub.original_text)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Chunk failed: {e}")
|
||||
for sub in chunk: sub.translated_text = sub.original_text # Fallback
|
||||
|
||||
translated_lines.extend(chunk)
|
||||
|
||||
return translated_lines
|
||||
# [/DEF:translation_engine]
|
||||
Reference in New Issue
Block a user