Spaces:

Akshay30
/

decipherai-api

Sleeping

App Files Files Community

decipherai-api / services /groq_vision_classifier.py

Akshay30

Initial DecipherAI backend deployment

2f4af3f 4 days ago

raw

history blame contribute delete

10.3 kB

	import base64
	import json
	import os
	from io import BytesIO
	from PIL import Image
	from groq import Groq


	class GroqVisionScriptClassifier:
	def __init__(self, groq_api_key):
	self.groq_client = Groq(api_key=groq_api_key)
	# FIXED: Use the correct stable model name
	self.vision_model = "meta-llama/llama-4-scout-17b-16e-instruct"
	print(f"[INFO] Groq Vision Classifier initialized with {self.vision_model}")

	def classify_script(self, image_path):
	"""Enhanced script classification including cuneiform using Groq's Llama Vision model"""
	try:
	# Convert image to base64
	base64_image = self._image_to_base64(image_path)
	if not base64_image:
	return "unknown"

	# Query Groq Vision API
	response = self._query_groq_vision(base64_image)

	# Parse the response
	script_type = self._parse_classification_response(response)

	print(f"[INFO] Llama Vision classified script as: {script_type}")
	return script_type.lower()

	except Exception as e:
	print(f"[ERROR] Groq Vision script classification failed: {e}")
	return "unknown"

	def _image_to_base64(self, image_path):
	"""Convert image to base64 for Groq Vision API (4MB limit)"""
	try:
	image = Image.open(image_path)

	# Resize if too large (keep under 4MB base64 limit)
	if max(image.size) > 1200:
	image.thumbnail((1200, 1200), Image.Resampling.LANCZOS)

	# Convert to base64 JPEG (smaller than PNG)
	buffer = BytesIO()
	image.save(buffer, format="JPEG", quality=90)
	image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')

	# Check size (base64 should be < 4MB)
	if len(image_b64) > 4 * 1024 * 1024: # 4MB limit
	# Reduce quality and try again
	buffer = BytesIO()
	image.save(buffer, format="JPEG", quality=70)
	image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')

	return image_b64

	except Exception as e:
	print(f"[ERROR] Image to base64 conversion failed: {e}")
	return None

	def _query_groq_vision(self, base64_image):
	"""Enhanced query for Groq Llama Vision API including cuneiform"""
	try:
	# FIXED: Simplified prompt to avoid token limit issues
	prompt = """Analyze this image of ancient text/script as an expert paleographer.

	Classify it as ONE of these ancient script types:

	- EGYPTIAN: Hieroglyphic symbols (birds, eyes, human figures, cartouches)
	- GREEK: Ancient/medieval Greek alphabet (α,β,γ,δ,ε,ζ,η,θ) with diacritics
	- LATIN: Latin alphabet letters, Roman inscriptions, medieval manuscripts
	- CUNEIFORM: Wedge-shaped impressions on clay tablets (triangular marks)

	IMPORTANT: Cuneiform has geometric wedge patterns, NOT pictures like hieroglyphs.

	Respond ONLY with JSON:
	{"classification": "EGYPTIAN" or "GREEK" or "LATIN" or "CUNEIFORM", "confidence": 0.0-1.0}"""

	completion = self.groq_client.chat.completions.create(
	model=self.vision_model,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	temperature=0.1, # Low temperature for consistent classification
	max_completion_tokens=100, # FIXED: Reduced to avoid token errors
	top_p=0.9,
	stream=False,
	response_format={"type": "json_object"}
	)

	return completion.choices[0].message.content

	except Exception as e:
	print(f"[ERROR] Groq Vision API call failed: {e}")
	return None

	def _parse_classification_response(self, response):
	"""Enhanced parsing for JSON response including cuneiform"""
	if not response:
	return "unknown"

	try:
	# Parse JSON response
	data = json.loads(response)
	classification = data.get('classification', '').upper()
	confidence = data.get('confidence', 0.0)

	print(f"[INFO] Vision model confidence: {confidence:.3f}")

	# Enhanced classification mapping including cuneiform
	if classification == "EGYPTIAN":
	return "egyptian"
	elif classification == "GREEK":
	return "greek"
	elif classification == "LATIN":
	return "latin"
	elif classification == "CUNEIFORM":
	return "cuneiform"
	else:
	print(f"[WARN] Unknown classification: {classification}")
	return "unknown"

	except json.JSONDecodeError:
	print(f"[WARN] Failed to parse JSON response, trying text parsing: {response}")
	# Enhanced fallback to text parsing
	response_upper = response.strip().upper()

	# Priority order: cuneiform keywords first (most specific)
	cuneiform_keywords = ["CUNEIFORM", "WEDGE", "CLAY", "MESOPOTAMIAN", "AKKADIAN", "SUMERIAN", "BABYLONIAN"]
	if any(keyword in response_upper for keyword in cuneiform_keywords):
	return "cuneiform"
	elif "EGYPTIAN" in response_upper or "HIEROGLYPH" in response_upper:
	return "egyptian"
	elif "GREEK" in response_upper:
	return "greek"
	elif "LATIN" in response_upper or "ROMAN" in response_upper:
	return "latin"

	except Exception as e:
	print(f"[ERROR] Response parsing failed: {e}")

	return "unknown"

	def classify_with_fallback(self, image_path, max_retries=2):
	"""Enhanced classification with retry logic"""
	for attempt in range(max_retries + 1):
	try:
	result = self.classify_script(image_path)

	if result != "unknown":
	return result
	elif attempt < max_retries:
	print(f"[INFO] Classification attempt {attempt + 1} returned unknown, retrying...")
	continue
	else:
	print(f"[WARN] All classification attempts returned unknown")
	return "unknown"

	except Exception as e:
	if attempt < max_retries:
	print(f"[WARN] Classification attempt {attempt + 1} failed: {e}, retrying...")
	continue
	else:
	print(f"[ERROR] All classification attempts failed: {e}")
	return "unknown"

	return "unknown"

	def get_supported_scripts(self):
	"""Get list of supported script types"""
	return ["egyptian", "greek", "latin", "cuneiform"]

	def validate_classification(self, script_type, confidence_threshold=0.7):
	"""Validate classification result"""
	supported_scripts = self.get_supported_scripts()

	if script_type not in supported_scripts:
	print(f"[WARN] Unsupported script type: {script_type}")
	return False

	# All classifications from Llama Vision are considered valid
	return True

	def get_model_info(self):
	"""Get information about the vision model being used"""
	return {
	"model": self.vision_model,
	"provider": "Groq",
	"supported_scripts": self.get_supported_scripts(),
	"features": [
	"Ancient script classification",
	"Multi-script support",
	"Cuneiform wedge detection",
	"Clay tablet recognition",
	"High-resolution image processing"
	]
	}

	def debug_classification(self, image_path, save_debug_info=False):
	"""Debug classification with detailed information"""
	try:
	print(f"[DEBUG] Starting classification for: {image_path}")

	# Check image properties
	image = Image.open(image_path)
	print(f"[DEBUG] Image size: {image.size}")
	print(f"[DEBUG] Image mode: {image.mode}")

	# Get base64 size
	base64_image = self._image_to_base64(image_path)
	if base64_image:
	print(f"[DEBUG] Base64 size: {len(base64_image)} characters")

	# Get raw response
	response = self._query_groq_vision(base64_image)
	print(f"[DEBUG] Raw API response: {response}")

	# Parse and return
	result = self._parse_classification_response(response)
	print(f"[DEBUG] Final classification: {result}")

	if save_debug_info:
	debug_info = {
	"image_path": image_path,
	"image_size": image.size,
	"base64_length": len(base64_image) if base64_image else 0,
	"raw_response": response,
	"classification": result
	}

	debug_file = f"debug_classification_{result}_{hash(image_path) % 10000}.json"
	with open(debug_file, 'w') as f:
	json.dump(debug_info, f, indent=2)
	print(f"[DEBUG] Debug info saved to: {debug_file}")

	return result

	except Exception as e:
	print(f"[ERROR] Debug classification failed: {e}")
	return "unknown"