File size: 2,581 Bytes
24f95f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import base64
import os
import tempfile
from typing import Dict, Any, Optional
from app.services.guardian_sensory import guardian_sensory

from app.services.mmsa_engine import mmsa_engine

class InputProcessor:
    async def process(self, request: Any) -> Dict[str, Any]:
        """
        Normalize input from text, URL, or Image.
        Returns a dictionary with 'text', 'source_type', and 'metadata'.
        """
        normalized = {
            "text": "",
            "source": request.source,
            "metadata": {}
        }

        if request.url:
            # Depth: Heuristic logic from Guardian + Multi-modal Dissonance from MMSA
            url_analysis = guardian_sensory.analyze_url(request.url)
            normalized["text"] = f"URL: {request.url}"
            normalized["metadata"]["url_analysis"] = url_analysis
            
            # Deep Scan: If it's a video URL, run MMSA
            if "youtube.com" in request.url or "youtu.be" in request.url:
                # We need a transcript for MMSA, if not provided we use a placeholder or 
                # in a real app we'd fetch it.
                mmsa_report = mmsa_engine.analyze_url(request.url, request.text or "Analyzing video signals.")
                normalized["metadata"]["mmsa"] = mmsa_report
                if "error" not in mmsa_report:
                    normalized["text"] += f"\nVideo Analysis: {mmsa_report.get('analysis_tags', [])}"
            
        if request.image_base64:
            text_from_image = await self._process_image(request.image_base64)
            normalized["text"] = text_from_image
            normalized["metadata"]["is_ocr"] = True

        if request.text:
            # Combine or prefer explicit text
            if normalized["text"]:
                normalized["text"] += f"\nMeta-Text: {request.text}"
            else:
                normalized["text"] = request.text

        return normalized

    async def _process_image(self, base64_str: str) -> str:
        # Remove header if present
        if "," in base64_str:
            base64_str = base64_str.split(",")[1]
            
        img_data = base64.b64decode(base64_str)
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
            tmp.write(img_data)
            tmp_path = tmp.name

        try:
            results = guardian_sensory.analyze_screenshot(tmp_path)
            return results.get("text", "")
        finally:
            if os.path.exists(tmp_path):
                os.remove(tmp_path)

input_processor = InputProcessor()