File size: 10,331 Bytes
2f4af3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
import base64
import json
import os
from io import BytesIO
from PIL import Image
from groq import Groq


class GroqVisionScriptClassifier:
    def __init__(self, groq_api_key):
        self.groq_client = Groq(api_key=groq_api_key)
        # FIXED: Use the correct stable model name
        self.vision_model = "meta-llama/llama-4-scout-17b-16e-instruct"
        print(f"[INFO] Groq Vision Classifier initialized with {self.vision_model}")
    
    def classify_script(self, image_path):
        """Enhanced script classification including cuneiform using Groq's Llama Vision model"""
        try:
            # Convert image to base64
            base64_image = self._image_to_base64(image_path)
            if not base64_image:
                return "unknown"
            
            # Query Groq Vision API
            response = self._query_groq_vision(base64_image)
            
            # Parse the response
            script_type = self._parse_classification_response(response)
            
            print(f"[INFO] Llama Vision classified script as: {script_type}")
            return script_type.lower()
            
        except Exception as e:
            print(f"[ERROR] Groq Vision script classification failed: {e}")
            return "unknown"
    
    def _image_to_base64(self, image_path):
        """Convert image to base64 for Groq Vision API (4MB limit)"""
        try:
            image = Image.open(image_path)
            
            # Resize if too large (keep under 4MB base64 limit)
            if max(image.size) > 1200:
                image.thumbnail((1200, 1200), Image.Resampling.LANCZOS)
            
            # Convert to base64 JPEG (smaller than PNG)
            buffer = BytesIO()
            image.save(buffer, format="JPEG", quality=90)
            image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
            
            # Check size (base64 should be < 4MB)
            if len(image_b64) > 4 * 1024 * 1024:  # 4MB limit
                # Reduce quality and try again
                buffer = BytesIO()
                image.save(buffer, format="JPEG", quality=70)
                image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
            
            return image_b64
            
        except Exception as e:
            print(f"[ERROR] Image to base64 conversion failed: {e}")
            return None
    
    def _query_groq_vision(self, base64_image):
        """Enhanced query for Groq Llama Vision API including cuneiform"""
        try:
            # FIXED: Simplified prompt to avoid token limit issues
            prompt = """Analyze this image of ancient text/script as an expert paleographer.

Classify it as ONE of these ancient script types:

- EGYPTIAN: Hieroglyphic symbols (birds, eyes, human figures, cartouches)
- GREEK: Ancient/medieval Greek alphabet (α,β,γ,δ,ε,ζ,η,θ) with diacritics
- LATIN: Latin alphabet letters, Roman inscriptions, medieval manuscripts
- CUNEIFORM: Wedge-shaped impressions on clay tablets (triangular marks)

IMPORTANT: Cuneiform has geometric wedge patterns, NOT pictures like hieroglyphs.

Respond ONLY with JSON:
{"classification": "EGYPTIAN" or "GREEK" or "LATIN" or "CUNEIFORM", "confidence": 0.0-1.0}"""

            completion = self.groq_client.chat.completions.create(
                model=self.vision_model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                }
                            }
                        ]
                    }
                ],
                temperature=0.1,  # Low temperature for consistent classification
                max_completion_tokens=100,  # FIXED: Reduced to avoid token errors
                top_p=0.9,
                stream=False,
                response_format={"type": "json_object"}
            )
            
            return completion.choices[0].message.content
            
        except Exception as e:
            print(f"[ERROR] Groq Vision API call failed: {e}")
            return None
    
    def _parse_classification_response(self, response):
        """Enhanced parsing for JSON response including cuneiform"""
        if not response:
            return "unknown"
        
        try:
            # Parse JSON response
            data = json.loads(response)
            classification = data.get('classification', '').upper()
            confidence = data.get('confidence', 0.0)
            
            print(f"[INFO] Vision model confidence: {confidence:.3f}")
            
            # Enhanced classification mapping including cuneiform
            if classification == "EGYPTIAN":
                return "egyptian"
            elif classification == "GREEK":
                return "greek"
            elif classification == "LATIN":
                return "latin"
            elif classification == "CUNEIFORM":
                return "cuneiform"
            else:
                print(f"[WARN] Unknown classification: {classification}")
                return "unknown"
                
        except json.JSONDecodeError:
            print(f"[WARN] Failed to parse JSON response, trying text parsing: {response}")
            # Enhanced fallback to text parsing
            response_upper = response.strip().upper()
            
            # Priority order: cuneiform keywords first (most specific)
            cuneiform_keywords = ["CUNEIFORM", "WEDGE", "CLAY", "MESOPOTAMIAN", "AKKADIAN", "SUMERIAN", "BABYLONIAN"]
            if any(keyword in response_upper for keyword in cuneiform_keywords):
                return "cuneiform"
            elif "EGYPTIAN" in response_upper or "HIEROGLYPH" in response_upper:
                return "egyptian"
            elif "GREEK" in response_upper:
                return "greek"
            elif "LATIN" in response_upper or "ROMAN" in response_upper:
                return "latin"
        
        except Exception as e:
            print(f"[ERROR] Response parsing failed: {e}")
        
        return "unknown"
    
    def classify_with_fallback(self, image_path, max_retries=2):
        """Enhanced classification with retry logic"""
        for attempt in range(max_retries + 1):
            try:
                result = self.classify_script(image_path)
                
                if result != "unknown":
                    return result
                elif attempt < max_retries:
                    print(f"[INFO] Classification attempt {attempt + 1} returned unknown, retrying...")
                    continue
                else:
                    print(f"[WARN] All classification attempts returned unknown")
                    return "unknown"
                    
            except Exception as e:
                if attempt < max_retries:
                    print(f"[WARN] Classification attempt {attempt + 1} failed: {e}, retrying...")
                    continue
                else:
                    print(f"[ERROR] All classification attempts failed: {e}")
                    return "unknown"
        
        return "unknown"
    
    def get_supported_scripts(self):
        """Get list of supported script types"""
        return ["egyptian", "greek", "latin", "cuneiform"]
    
    def validate_classification(self, script_type, confidence_threshold=0.7):
        """Validate classification result"""
        supported_scripts = self.get_supported_scripts()
        
        if script_type not in supported_scripts:
            print(f"[WARN] Unsupported script type: {script_type}")
            return False
        
        # All classifications from Llama Vision are considered valid
        return True
    
    def get_model_info(self):
        """Get information about the vision model being used"""
        return {
            "model": self.vision_model,
            "provider": "Groq",
            "supported_scripts": self.get_supported_scripts(),
            "features": [
                "Ancient script classification",
                "Multi-script support", 
                "Cuneiform wedge detection",
                "Clay tablet recognition",
                "High-resolution image processing"
            ]
        }

    def debug_classification(self, image_path, save_debug_info=False):
        """Debug classification with detailed information"""
        try:
            print(f"[DEBUG] Starting classification for: {image_path}")
            
            # Check image properties
            image = Image.open(image_path)
            print(f"[DEBUG] Image size: {image.size}")
            print(f"[DEBUG] Image mode: {image.mode}")
            
            # Get base64 size
            base64_image = self._image_to_base64(image_path)
            if base64_image:
                print(f"[DEBUG] Base64 size: {len(base64_image)} characters")
            
            # Get raw response
            response = self._query_groq_vision(base64_image)
            print(f"[DEBUG] Raw API response: {response}")
            
            # Parse and return
            result = self._parse_classification_response(response)
            print(f"[DEBUG] Final classification: {result}")
            
            if save_debug_info:
                debug_info = {
                    "image_path": image_path,
                    "image_size": image.size,
                    "base64_length": len(base64_image) if base64_image else 0,
                    "raw_response": response,
                    "classification": result
                }
                
                debug_file = f"debug_classification_{result}_{hash(image_path) % 10000}.json"
                with open(debug_file, 'w') as f:
                    json.dump(debug_info, f, indent=2)
                print(f"[DEBUG] Debug info saved to: {debug_file}")
            
            return result
            
        except Exception as e:
            print(f"[ERROR] Debug classification failed: {e}")
            return "unknown"