File size: 10,560 Bytes
2f4af3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
from processors.egyptian_processor import EgyptianProcessor
from processors.greek_processor import GreekProcessor
from processors.latin_processor import LatinProcessor
from processors.cuneiform_processor import CuneiformProcessor
from .groq_vision_classifier import GroqVisionScriptClassifier


class ScriptDetectionService:
    def __init__(self, groq_client, references, clip_classifier, translator_pipe, cuneiform_processor=None):
        # Initialize processors including cuneiform
        self.egyptian_processor = EgyptianProcessor(groq_client, references, clip_classifier, translator_pipe)
        self.greek_processor = GreekProcessor(groq_client, references, clip_classifier)
        self.latin_processor = LatinProcessor(groq_client, references, clip_classifier)
        
        # Initialize cuneiform processor or use the shared instance
        if cuneiform_processor:
            self.cuneiform_processor = cuneiform_processor
            print("[INFO] Cuneiform processor shared from global app instance")
        else:
            try:
                print("[INFO] Initializing cuneiform processor in detection service...")
                self.cuneiform_processor = CuneiformProcessor(groq_client, references, clip_classifier)
                print("[INFO] Cuneiform processor initialized successfully")
            except Exception as e:
                print(f"[WARN] Failed to initialize cuneiform processor: {e}")
                self.cuneiform_processor = None
        
        # FIXED: Get API key from groq_client with multiple fallback options
        api_key = None
        if hasattr(groq_client, 'api_key'):
            api_key = groq_client.api_key
        elif hasattr(groq_client, 'client') and hasattr(groq_client.client, 'api_key'):
            api_key = groq_client.client.api_key
        else:
            # Fallback: get from config or environment
            try:
                from config import Config
                config = Config()
                api_key = config.GROQ_API_KEY
            except:
                import os
                api_key = os.getenv('GROQ_API_KEY')

        # Initialize Groq Vision script classifier if API key is present
        if api_key:
            try:
                self.vision_classifier = GroqVisionScriptClassifier(api_key)
                print("[INFO] Groq Vision Script Detection Service initialized")
            except Exception as e:
                print(f"[WARN] Failed to initialize Groq Vision script classifier: {e}")
                self.vision_classifier = None
        else:
            print("[WARN] GROQ_API_KEY not found! Groq Vision classifier disabled. Falling back to zero-shot CLIP classifier.")
            self.vision_classifier = None
        
        # Keep track of clip_classifier
        self.clip_classifier = clip_classifier
        
        # Enhanced processor mapping with cuneiform
        self.processors = {
            'egyptian': self.egyptian_processor,
            'greek': self.greek_processor,
            'latin': self.latin_processor,
            'cuneiform': self.cuneiform_processor
        }
        
        if self.cuneiform_processor:
            print("[INFO] Cuneiform support: ENABLED (praeclarum/cuneiform model)")
        else:
            print("[WARN] Cuneiform support: DISABLED (processor initialization failed)")
    
    def detect_and_process(self, image_path):
        """Enhanced detection with cuneiform support - uses Groq Vision with CLIP fallback"""
        try:
            # Step 1: Get script classification from Groq Vision or CLIP
            script_type = "unknown"
            classification_method = "unknown"
            classification_confidence = 0.0
            
            if self.vision_classifier:
                try:
                    script_type = self.vision_classifier.classify_script(image_path)
                    classification_method = 'groq_vision'
                    classification_confidence = 0.95
                except Exception as e:
                    print(f"[WARN] Groq Vision classification failed: {e}. Falling back to CLIP.")
            
            if script_type == "unknown" or not self.vision_classifier:
                from PIL import Image
                try:
                    img = Image.open(image_path)
                    script_type, classification_confidence = self.clip_classifier.classify_script_type(img)
                    classification_method = 'clip_zero_shot'
                    print(f"[INFO] CLIP fallback classification: {script_type} (conf={classification_confidence:.3f})")
                except Exception as ce:
                    print(f"[ERROR] CLIP fallback classification failed: {ce}")
                    script_type = "egyptian"  # default fallback
                    classification_method = "default_fallback"
                    classification_confidence = 0.5
            
            print(f"[INFO] Final classification routed: {script_type} via {classification_method}")
            
            # Step 2: Route to appropriate processor including cuneiform
            if script_type == "egyptian":
                print("[INFO] Routing to Egyptian processor...")
                result = self.egyptian_processor.process_image(image_path)
                
            elif script_type == "greek":
                print("[INFO] Routing to Greek processor...")
                result = self.greek_processor.process_image(image_path)
                
            elif script_type == "latin":
                print("[INFO] Routing to Latin processor...")
                result = self.latin_processor.process_image(image_path)
                
            elif script_type == "cuneiform":
                print("[INFO] Routing to Cuneiform processor...")
                if self.cuneiform_processor and self.cuneiform_processor.cuneiform_available:
                    result = self.cuneiform_processor.process_image(image_path)
                else:
                    print("[ERROR] Cuneiform processor not available!")
                    # Create error result
                    result = {
                        'script_type': 'cuneiform',
                        'confidence': 0.0,
                        'processed_result': {
                            'text': 'Cuneiform processor unavailable',
                            'validation': {'quality_score': 0.0, 'error': 'Model not loaded'}
                        },
                        'historical_context': {},
                        'creative_story': 'Cuneiform processing failed - model not available'
                    }
                
            else:  # unknown
                print(f"[INFO] Unknown classification '{script_type}', defaulting to Egyptian...")
                result = self.egyptian_processor.process_image(image_path)
            
            # Step 3: Return result with classification metadata
            if result:
                result['vision_classification'] = script_type
                result['classification_method'] = classification_method
                result['classification_confidence'] = classification_confidence
                print(f"[INFO] {script_type.title()} processing completed successfully")
                return result
            else:
                print(f"[ERROR] {script_type.title()} processor returned None")
                return None
            
        except Exception as e:
            print(f"[ERROR] Classification and processing failed: {e}")
            import traceback
            traceback.print_exc()
            return None

    def get_processor_by_type(self, script_type):
        """Get processor by script type - now includes cuneiform"""
        processor = self.processors.get(script_type.lower())
        
        if script_type.lower() == 'cuneiform' and processor and not processor.cuneiform_available:
            print(f"[WARN] Cuneiform processor exists but model not available")
            return None
            
        return processor
    
    def get_supported_scripts(self):
        """Get list of supported script types"""
        scripts = ['egyptian', 'greek', 'latin']
        
        if self.cuneiform_processor and self.cuneiform_processor.cuneiform_available:
            scripts.append('cuneiform')
            
        return scripts
    
    def get_processor_status(self):
        """Get status of all processors"""
        status = {
            'egyptian': self.egyptian_processor is not None,
            'greek': self.greek_processor is not None,
            'latin': self.latin_processor is not None,
            'cuneiform': self.cuneiform_processor is not None and getattr(self.cuneiform_processor, 'cuneiform_available', False)
        }
        
        return status

    def validate_script_detection(self, script_type, processed_result):
        """Validate script detection results - enhanced for cuneiform"""
        try:
            validation = processed_result.get('validation', {})
            quality_score = validation.get('quality_score', 0.0)
            
            # Script-specific validation thresholds
            thresholds = {
                'egyptian': 0.3,
                'greek': 0.4, 
                'latin': 0.4,
                'cuneiform': 0.2  # Lower threshold due to OCR challenges
            }
            
            threshold = thresholds.get(script_type, 0.3)
            
            # Additional cuneiform validation
            if script_type == 'cuneiform':
                cuneiform_ratio = validation.get('cuneiform_ratio', 0.0)
                atf_ratio = validation.get('atf_ratio', 0.0)
                
                # Accept if either Unicode cuneiform or ATF format detected
                if cuneiform_ratio > 0.1 or atf_ratio > 0.3:
                    print(f"[INFO] Cuneiform validation passed: cuneiform_ratio={cuneiform_ratio:.3f}, atf_ratio={atf_ratio:.3f}")
                    return True
                    
            # Standard quality validation
            is_valid = quality_score >= threshold
            
            if is_valid:
                print(f"[INFO] {script_type.title()} validation passed: quality={quality_score:.3f} >= {threshold}")
            else:
                print(f"[WARN] {script_type.title()} validation failed: quality={quality_score:.3f} < {threshold}")
                
            return is_valid
            
        except Exception as e:
            print(f"[ERROR] Validation failed: {e}")
            return False