File size: 3,099 Bytes
6fd27b2
 
 
5297a41
 
f90b1c7
5297a41
 
 
 
 
 
6fd27b2
 
 
 
 
 
5297a41
6fd27b2
 
 
 
 
 
 
 
5297a41
6fd27b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5297a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fd27b2
5297a41
 
6fd27b2
5297a41
 
f90b1c7
5297a41
 
6fd27b2
 
 
 
5297a41
6fd27b2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from dataclasses import dataclass, asdict
from typing import Dict, Any, Optional

from .config import RunConfig, ModelSpec
from .agents import AnalyzerAgent, RefactorAgent, CriticAgent, AgentResult, BaseAgent
from .tasks import TaskContext, build_analyzer_prompt, build_refactor_prompt, build_critic_prompt
from .backends import HFInferenceAPIBackend

try:
    from PIL import Image
except Exception:
    Image = None


@dataclass
class RunTrace:
    task: str
    input_requirements: str
    diagram_context: str
    analyzer: AgentResult
    refactor: AgentResult
    critic: AgentResult

    def to_dict(self) -> Dict[str, Any]:
        return {
            "task": self.task,
            "input_requirements": self.input_requirements,
            "diagram_context": self.diagram_context,
            "analyzer": asdict(self.analyzer),
            "refactor": asdict(self.refactor),
            "critic": asdict(self.critic),
        }


class Orchestrator:
    def __init__(self, cfg: RunConfig, ctx: Optional[TaskContext] = None):
        self.cfg = cfg
        self.ctx = ctx or TaskContext()

        self.analyzer = AnalyzerAgent(cfg.analyzer)
        self.refactor = RefactorAgent(cfg.refactor)
        self.critic = CriticAgent(cfg.critic)

        # Dedicated models for diagram extraction (free/open on HF Inference API)
        # OCR: TrOCR, Caption: BLIP
        self.ocr_backend = HFInferenceAPIBackend("microsoft/trocr-base-printed")
        self.caption_backend = HFInferenceAPIBackend("Salesforce/blip-image-captioning-large")

    def _extract_diagram_context(self, image: Optional["Image.Image"]) -> str:
        if image is None:
            return ""

        parts = []
        try:
            ocr = self.ocr_backend.image_to_text(image)
            if ocr and ocr.strip():
                parts.append("OCR (текст на изображении):\n" + ocr.strip())
        except Exception as e:
            parts.append(f"OCR: ошибка ({type(e).__name__})")

        try:
            cap = self.caption_backend.image_to_text(image)
            if cap and cap.strip():
                parts.append("Описание изображения:\n" + cap.strip())
        except Exception as e:
            parts.append(f"Caption: ошибка ({type(e).__name__})")

        return "\n\n".join(parts).strip()

    def run(self, requirements_text: str, image: Optional["Image.Image"] = None) -> RunTrace:
        diagram_context = self._extract_diagram_context(image)

        p1 = build_analyzer_prompt(requirements_text, diagram_context, self.ctx)
        r1 = self.analyzer.run(p1)

        p2 = build_refactor_prompt(requirements_text, r1.output, diagram_context, self.ctx)
        r2 = self.refactor.run(p2)

        p3 = build_critic_prompt(requirements_text, r2.output, diagram_context, self.ctx)
        r3 = self.critic.run(p3)

        return RunTrace(
            task=self.ctx.name,
            input_requirements=requirements_text,
            diagram_context=diagram_context,
            analyzer=r1,
            refactor=r2,
            critic=r3,
        )