Spaces:
Sleeping
Sleeping
File size: 10,331 Bytes
2f4af3f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | import base64
import json
import os
from io import BytesIO
from PIL import Image
from groq import Groq
class GroqVisionScriptClassifier:
def __init__(self, groq_api_key):
self.groq_client = Groq(api_key=groq_api_key)
# FIXED: Use the correct stable model name
self.vision_model = "meta-llama/llama-4-scout-17b-16e-instruct"
print(f"[INFO] Groq Vision Classifier initialized with {self.vision_model}")
def classify_script(self, image_path):
"""Enhanced script classification including cuneiform using Groq's Llama Vision model"""
try:
# Convert image to base64
base64_image = self._image_to_base64(image_path)
if not base64_image:
return "unknown"
# Query Groq Vision API
response = self._query_groq_vision(base64_image)
# Parse the response
script_type = self._parse_classification_response(response)
print(f"[INFO] Llama Vision classified script as: {script_type}")
return script_type.lower()
except Exception as e:
print(f"[ERROR] Groq Vision script classification failed: {e}")
return "unknown"
def _image_to_base64(self, image_path):
"""Convert image to base64 for Groq Vision API (4MB limit)"""
try:
image = Image.open(image_path)
# Resize if too large (keep under 4MB base64 limit)
if max(image.size) > 1200:
image.thumbnail((1200, 1200), Image.Resampling.LANCZOS)
# Convert to base64 JPEG (smaller than PNG)
buffer = BytesIO()
image.save(buffer, format="JPEG", quality=90)
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
# Check size (base64 should be < 4MB)
if len(image_b64) > 4 * 1024 * 1024: # 4MB limit
# Reduce quality and try again
buffer = BytesIO()
image.save(buffer, format="JPEG", quality=70)
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
return image_b64
except Exception as e:
print(f"[ERROR] Image to base64 conversion failed: {e}")
return None
def _query_groq_vision(self, base64_image):
"""Enhanced query for Groq Llama Vision API including cuneiform"""
try:
# FIXED: Simplified prompt to avoid token limit issues
prompt = """Analyze this image of ancient text/script as an expert paleographer.
Classify it as ONE of these ancient script types:
- EGYPTIAN: Hieroglyphic symbols (birds, eyes, human figures, cartouches)
- GREEK: Ancient/medieval Greek alphabet (α,β,γ,δ,ε,ζ,η,θ) with diacritics
- LATIN: Latin alphabet letters, Roman inscriptions, medieval manuscripts
- CUNEIFORM: Wedge-shaped impressions on clay tablets (triangular marks)
IMPORTANT: Cuneiform has geometric wedge patterns, NOT pictures like hieroglyphs.
Respond ONLY with JSON:
{"classification": "EGYPTIAN" or "GREEK" or "LATIN" or "CUNEIFORM", "confidence": 0.0-1.0}"""
completion = self.groq_client.chat.completions.create(
model=self.vision_model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
temperature=0.1, # Low temperature for consistent classification
max_completion_tokens=100, # FIXED: Reduced to avoid token errors
top_p=0.9,
stream=False,
response_format={"type": "json_object"}
)
return completion.choices[0].message.content
except Exception as e:
print(f"[ERROR] Groq Vision API call failed: {e}")
return None
def _parse_classification_response(self, response):
"""Enhanced parsing for JSON response including cuneiform"""
if not response:
return "unknown"
try:
# Parse JSON response
data = json.loads(response)
classification = data.get('classification', '').upper()
confidence = data.get('confidence', 0.0)
print(f"[INFO] Vision model confidence: {confidence:.3f}")
# Enhanced classification mapping including cuneiform
if classification == "EGYPTIAN":
return "egyptian"
elif classification == "GREEK":
return "greek"
elif classification == "LATIN":
return "latin"
elif classification == "CUNEIFORM":
return "cuneiform"
else:
print(f"[WARN] Unknown classification: {classification}")
return "unknown"
except json.JSONDecodeError:
print(f"[WARN] Failed to parse JSON response, trying text parsing: {response}")
# Enhanced fallback to text parsing
response_upper = response.strip().upper()
# Priority order: cuneiform keywords first (most specific)
cuneiform_keywords = ["CUNEIFORM", "WEDGE", "CLAY", "MESOPOTAMIAN", "AKKADIAN", "SUMERIAN", "BABYLONIAN"]
if any(keyword in response_upper for keyword in cuneiform_keywords):
return "cuneiform"
elif "EGYPTIAN" in response_upper or "HIEROGLYPH" in response_upper:
return "egyptian"
elif "GREEK" in response_upper:
return "greek"
elif "LATIN" in response_upper or "ROMAN" in response_upper:
return "latin"
except Exception as e:
print(f"[ERROR] Response parsing failed: {e}")
return "unknown"
def classify_with_fallback(self, image_path, max_retries=2):
"""Enhanced classification with retry logic"""
for attempt in range(max_retries + 1):
try:
result = self.classify_script(image_path)
if result != "unknown":
return result
elif attempt < max_retries:
print(f"[INFO] Classification attempt {attempt + 1} returned unknown, retrying...")
continue
else:
print(f"[WARN] All classification attempts returned unknown")
return "unknown"
except Exception as e:
if attempt < max_retries:
print(f"[WARN] Classification attempt {attempt + 1} failed: {e}, retrying...")
continue
else:
print(f"[ERROR] All classification attempts failed: {e}")
return "unknown"
return "unknown"
def get_supported_scripts(self):
"""Get list of supported script types"""
return ["egyptian", "greek", "latin", "cuneiform"]
def validate_classification(self, script_type, confidence_threshold=0.7):
"""Validate classification result"""
supported_scripts = self.get_supported_scripts()
if script_type not in supported_scripts:
print(f"[WARN] Unsupported script type: {script_type}")
return False
# All classifications from Llama Vision are considered valid
return True
def get_model_info(self):
"""Get information about the vision model being used"""
return {
"model": self.vision_model,
"provider": "Groq",
"supported_scripts": self.get_supported_scripts(),
"features": [
"Ancient script classification",
"Multi-script support",
"Cuneiform wedge detection",
"Clay tablet recognition",
"High-resolution image processing"
]
}
def debug_classification(self, image_path, save_debug_info=False):
"""Debug classification with detailed information"""
try:
print(f"[DEBUG] Starting classification for: {image_path}")
# Check image properties
image = Image.open(image_path)
print(f"[DEBUG] Image size: {image.size}")
print(f"[DEBUG] Image mode: {image.mode}")
# Get base64 size
base64_image = self._image_to_base64(image_path)
if base64_image:
print(f"[DEBUG] Base64 size: {len(base64_image)} characters")
# Get raw response
response = self._query_groq_vision(base64_image)
print(f"[DEBUG] Raw API response: {response}")
# Parse and return
result = self._parse_classification_response(response)
print(f"[DEBUG] Final classification: {result}")
if save_debug_info:
debug_info = {
"image_path": image_path,
"image_size": image.size,
"base64_length": len(base64_image) if base64_image else 0,
"raw_response": response,
"classification": result
}
debug_file = f"debug_classification_{result}_{hash(image_path) % 10000}.json"
with open(debug_file, 'w') as f:
json.dump(debug_info, f, indent=2)
print(f"[DEBUG] Debug info saved to: {debug_file}")
return result
except Exception as e:
print(f"[ERROR] Debug classification failed: {e}")
return "unknown"
|