Spaces:
Sleeping
Sleeping
File size: 10,560 Bytes
2f4af3f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | from processors.egyptian_processor import EgyptianProcessor
from processors.greek_processor import GreekProcessor
from processors.latin_processor import LatinProcessor
from processors.cuneiform_processor import CuneiformProcessor
from .groq_vision_classifier import GroqVisionScriptClassifier
class ScriptDetectionService:
def __init__(self, groq_client, references, clip_classifier, translator_pipe, cuneiform_processor=None):
# Initialize processors including cuneiform
self.egyptian_processor = EgyptianProcessor(groq_client, references, clip_classifier, translator_pipe)
self.greek_processor = GreekProcessor(groq_client, references, clip_classifier)
self.latin_processor = LatinProcessor(groq_client, references, clip_classifier)
# Initialize cuneiform processor or use the shared instance
if cuneiform_processor:
self.cuneiform_processor = cuneiform_processor
print("[INFO] Cuneiform processor shared from global app instance")
else:
try:
print("[INFO] Initializing cuneiform processor in detection service...")
self.cuneiform_processor = CuneiformProcessor(groq_client, references, clip_classifier)
print("[INFO] Cuneiform processor initialized successfully")
except Exception as e:
print(f"[WARN] Failed to initialize cuneiform processor: {e}")
self.cuneiform_processor = None
# FIXED: Get API key from groq_client with multiple fallback options
api_key = None
if hasattr(groq_client, 'api_key'):
api_key = groq_client.api_key
elif hasattr(groq_client, 'client') and hasattr(groq_client.client, 'api_key'):
api_key = groq_client.client.api_key
else:
# Fallback: get from config or environment
try:
from config import Config
config = Config()
api_key = config.GROQ_API_KEY
except:
import os
api_key = os.getenv('GROQ_API_KEY')
# Initialize Groq Vision script classifier if API key is present
if api_key:
try:
self.vision_classifier = GroqVisionScriptClassifier(api_key)
print("[INFO] Groq Vision Script Detection Service initialized")
except Exception as e:
print(f"[WARN] Failed to initialize Groq Vision script classifier: {e}")
self.vision_classifier = None
else:
print("[WARN] GROQ_API_KEY not found! Groq Vision classifier disabled. Falling back to zero-shot CLIP classifier.")
self.vision_classifier = None
# Keep track of clip_classifier
self.clip_classifier = clip_classifier
# Enhanced processor mapping with cuneiform
self.processors = {
'egyptian': self.egyptian_processor,
'greek': self.greek_processor,
'latin': self.latin_processor,
'cuneiform': self.cuneiform_processor
}
if self.cuneiform_processor:
print("[INFO] Cuneiform support: ENABLED (praeclarum/cuneiform model)")
else:
print("[WARN] Cuneiform support: DISABLED (processor initialization failed)")
def detect_and_process(self, image_path):
"""Enhanced detection with cuneiform support - uses Groq Vision with CLIP fallback"""
try:
# Step 1: Get script classification from Groq Vision or CLIP
script_type = "unknown"
classification_method = "unknown"
classification_confidence = 0.0
if self.vision_classifier:
try:
script_type = self.vision_classifier.classify_script(image_path)
classification_method = 'groq_vision'
classification_confidence = 0.95
except Exception as e:
print(f"[WARN] Groq Vision classification failed: {e}. Falling back to CLIP.")
if script_type == "unknown" or not self.vision_classifier:
from PIL import Image
try:
img = Image.open(image_path)
script_type, classification_confidence = self.clip_classifier.classify_script_type(img)
classification_method = 'clip_zero_shot'
print(f"[INFO] CLIP fallback classification: {script_type} (conf={classification_confidence:.3f})")
except Exception as ce:
print(f"[ERROR] CLIP fallback classification failed: {ce}")
script_type = "egyptian" # default fallback
classification_method = "default_fallback"
classification_confidence = 0.5
print(f"[INFO] Final classification routed: {script_type} via {classification_method}")
# Step 2: Route to appropriate processor including cuneiform
if script_type == "egyptian":
print("[INFO] Routing to Egyptian processor...")
result = self.egyptian_processor.process_image(image_path)
elif script_type == "greek":
print("[INFO] Routing to Greek processor...")
result = self.greek_processor.process_image(image_path)
elif script_type == "latin":
print("[INFO] Routing to Latin processor...")
result = self.latin_processor.process_image(image_path)
elif script_type == "cuneiform":
print("[INFO] Routing to Cuneiform processor...")
if self.cuneiform_processor and self.cuneiform_processor.cuneiform_available:
result = self.cuneiform_processor.process_image(image_path)
else:
print("[ERROR] Cuneiform processor not available!")
# Create error result
result = {
'script_type': 'cuneiform',
'confidence': 0.0,
'processed_result': {
'text': 'Cuneiform processor unavailable',
'validation': {'quality_score': 0.0, 'error': 'Model not loaded'}
},
'historical_context': {},
'creative_story': 'Cuneiform processing failed - model not available'
}
else: # unknown
print(f"[INFO] Unknown classification '{script_type}', defaulting to Egyptian...")
result = self.egyptian_processor.process_image(image_path)
# Step 3: Return result with classification metadata
if result:
result['vision_classification'] = script_type
result['classification_method'] = classification_method
result['classification_confidence'] = classification_confidence
print(f"[INFO] {script_type.title()} processing completed successfully")
return result
else:
print(f"[ERROR] {script_type.title()} processor returned None")
return None
except Exception as e:
print(f"[ERROR] Classification and processing failed: {e}")
import traceback
traceback.print_exc()
return None
def get_processor_by_type(self, script_type):
"""Get processor by script type - now includes cuneiform"""
processor = self.processors.get(script_type.lower())
if script_type.lower() == 'cuneiform' and processor and not processor.cuneiform_available:
print(f"[WARN] Cuneiform processor exists but model not available")
return None
return processor
def get_supported_scripts(self):
"""Get list of supported script types"""
scripts = ['egyptian', 'greek', 'latin']
if self.cuneiform_processor and self.cuneiform_processor.cuneiform_available:
scripts.append('cuneiform')
return scripts
def get_processor_status(self):
"""Get status of all processors"""
status = {
'egyptian': self.egyptian_processor is not None,
'greek': self.greek_processor is not None,
'latin': self.latin_processor is not None,
'cuneiform': self.cuneiform_processor is not None and getattr(self.cuneiform_processor, 'cuneiform_available', False)
}
return status
def validate_script_detection(self, script_type, processed_result):
"""Validate script detection results - enhanced for cuneiform"""
try:
validation = processed_result.get('validation', {})
quality_score = validation.get('quality_score', 0.0)
# Script-specific validation thresholds
thresholds = {
'egyptian': 0.3,
'greek': 0.4,
'latin': 0.4,
'cuneiform': 0.2 # Lower threshold due to OCR challenges
}
threshold = thresholds.get(script_type, 0.3)
# Additional cuneiform validation
if script_type == 'cuneiform':
cuneiform_ratio = validation.get('cuneiform_ratio', 0.0)
atf_ratio = validation.get('atf_ratio', 0.0)
# Accept if either Unicode cuneiform or ATF format detected
if cuneiform_ratio > 0.1 or atf_ratio > 0.3:
print(f"[INFO] Cuneiform validation passed: cuneiform_ratio={cuneiform_ratio:.3f}, atf_ratio={atf_ratio:.3f}")
return True
# Standard quality validation
is_valid = quality_score >= threshold
if is_valid:
print(f"[INFO] {script_type.title()} validation passed: quality={quality_score:.3f} >= {threshold}")
else:
print(f"[WARN] {script_type.title()} validation failed: quality={quality_score:.3f} < {threshold}")
return is_valid
except Exception as e:
print(f"[ERROR] Validation failed: {e}")
return False
|