Aoun-Ai / scripts /diagnose_ocr.py
MuhammadMahmoud's picture
feat: clean deployment with bug fixes and stability improvements
18b8b90
"""
Comprehensive OCR Provider Diagnostic Script
Tests each provider individually to identify exact failure reasons.
"""
import asyncio
import sys
import os
import traceback
# Ensure imports work
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from PIL import Image
# Create a small test image (white square with some text-like pattern)
def create_test_image():
"""Create a minimal test image for OCR testing."""
img = Image.new("RGB", (200, 100), color=(255, 255, 255))
# Draw some dark pixels to simulate text
for x in range(20, 180):
for y in range(40, 60):
if (x + y) % 3 == 0:
img.putpixel((x, y), (0, 0, 0))
return img
def separator(title: str):
print(f"\n{'='*60}")
print(f" {title}")
print(f"{'='*60}")
async def test_groq():
"""Test Groq provider."""
separator("TESTING GROQ PROVIDER")
try:
from app.core.config import settings
print(f" GROQ_API_KEY present: {bool(settings.GROQ_API_KEY)}")
print(f" GROQ_API_KEY prefix: {settings.GROQ_API_KEY[:10]}..." if settings.GROQ_API_KEY else " NO KEY")
from groq import AsyncGroq
print(" βœ… groq package imported successfully")
client = AsyncGroq(api_key=settings.GROQ_API_KEY)
print(" βœ… AsyncGroq client created")
# Quick test: just list models to verify API key works
models = await client.models.list()
vision_models = [m.id for m in models.data if "vision" in m.id.lower()]
print(f" βœ… API key valid! Available vision models: {vision_models}")
# Now test actual OCR
from app.services.ocr.groq_provider import GroqOCRProvider
from app.schemas import DocumentType
provider = GroqOCRProvider()
print(f" βœ… GroqOCRProvider instantiated, available: {provider.is_available()}")
img = create_test_image()
prompt = "Extract text from this image. Return JSON: {\"text\": \"...\"}"
result = await provider.extract(img, DocumentType.ID_CARD, prompt)
print(f" βœ… OCR SUCCESS! Result: {result}")
return True
except Exception as e:
print(f" ❌ FAILED: {type(e).__name__}: {e}")
traceback.print_exc()
return False
async def test_gemini():
"""Test Gemini provider."""
separator("TESTING GEMINI PROVIDER")
try:
from app.core.config import settings
print(f" GEMINI_API_KEY present: {bool(settings.GEMINI_API_KEY)}")
print(f" GEMINI_API_KEY prefix: {settings.GEMINI_API_KEY[:10]}..." if settings.GEMINI_API_KEY else " NO KEY")
from google import genai
print(" βœ… google-genai package imported successfully")
client = genai.Client(api_key=settings.GEMINI_API_KEY)
print(" βœ… genai.Client created")
# Quick model list test
# Test actual OCR
from app.services.ocr.gemini_provider import GeminiOCRProvider
from app.schemas import DocumentType
provider = GeminiOCRProvider()
print(f" βœ… GeminiOCRProvider instantiated, available: {provider.is_available()}")
img = create_test_image()
prompt = "Extract text from this image. Return JSON: {\"text\": \"...\"}"
result = await provider.extract(img, DocumentType.ID_CARD, prompt)
print(f" βœ… OCR SUCCESS! Result: {result}")
return True
except Exception as e:
print(f" ❌ FAILED: {type(e).__name__}: {e}")
traceback.print_exc()
return False
async def test_openai():
"""Test OpenAI provider."""
separator("TESTING OPENAI PROVIDER")
try:
from app.core.config import settings
print(f" OPENAI_API_KEY present: {bool(settings.OPENAI_API_KEY)}")
print(f" OPENAI_API_KEY prefix: {settings.OPENAI_API_KEY[:15]}..." if settings.OPENAI_API_KEY else " NO KEY")
from openai import AsyncOpenAI
print(" βœ… openai package imported successfully")
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
print(" βœ… AsyncOpenAI client created")
# Test actual OCR
from app.services.ocr.openai_provider import OpenAIOCRProvider
from app.schemas import DocumentType
provider = OpenAIOCRProvider()
print(f" βœ… OpenAIOCRProvider instantiated, available: {provider.is_available()}")
img = create_test_image()
prompt = "Extract text from this image. Return JSON: {\"text\": \"...\"}"
result = await provider.extract(img, DocumentType.ID_CARD, prompt)
print(f" βœ… OCR SUCCESS! Result: {result}")
return True
except Exception as e:
print(f" ❌ FAILED: {type(e).__name__}: {e}")
traceback.print_exc()
return False
async def test_huggingface():
"""Test HuggingFace provider."""
separator("TESTING HUGGINGFACE PROVIDER")
try:
from app.core.config import settings
print(f" HUGGINGFACE_API_KEY present: {bool(settings.HUGGINGFACE_API_KEY)}")
print(f" HUGGINGFACE_API_KEY prefix: {settings.HUGGINGFACE_API_KEY[:10]}..." if settings.HUGGINGFACE_API_KEY else " NO KEY")
from huggingface_hub import InferenceClient
print(" βœ… huggingface_hub package imported successfully")
from app.services.ocr.huggingface_provider import _HF_MODEL
print(f" HF Model: {_HF_MODEL}")
client = InferenceClient(provider="hf-inference", api_key=settings.HUGGINGFACE_API_KEY)
print(" βœ… InferenceClient created (hf-inference provider)")
# Test actual OCR
from app.services.ocr.huggingface_provider import HuggingFaceOCRProvider
from app.schemas import DocumentType
provider = HuggingFaceOCRProvider()
print(f" βœ… HuggingFaceOCRProvider instantiated, available: {provider.is_available()}")
img = create_test_image()
prompt = "Extract text from this image. Return JSON: {\"text\": \"...\"}"
result = await provider.extract(img, DocumentType.ID_CARD, prompt)
print(f" βœ… OCR SUCCESS! Result: {result}")
return True
except Exception as e:
print(f" ❌ FAILED: {type(e).__name__}: {e}")
traceback.print_exc()
return False
async def main():
separator("OCR PROVIDER DIAGNOSTIC")
print(f" Python: {sys.version}")
print(f" CWD: {os.getcwd()}")
# Check installed packages
print("\n Checking installed packages...")
packages = ["groq", "openai", "google.generativeai", "google.genai", "PIL", "json_repair", "pydantic_settings"]
for pkg in packages:
try:
__import__(pkg)
print(f" βœ… {pkg}")
except ImportError:
print(f" ❌ {pkg} NOT INSTALLED")
# Check settings
print("\n Checking settings...")
try:
from app.core.config import settings
print(f" GROQ_API_KEY: {'βœ… set' if settings.GROQ_API_KEY else '❌ empty'}")
print(f" GEMINI_API_KEY: {'βœ… set' if settings.GEMINI_API_KEY else '❌ empty'}")
print(f" OPENAI_API_KEY: {'βœ… set' if settings.OPENAI_API_KEY else '❌ empty'}")
print(f" HUGGINGFACE_API_KEY:{'βœ… set' if settings.HUGGINGFACE_API_KEY else '❌ empty'}")
except Exception as e:
print(f" ❌ Settings load failed: {e}")
traceback.print_exc()
return
# Test each provider
results = {}
results["groq"] = await test_groq()
results["gemini"] = await test_gemini()
results["openai"] = await test_openai()
results["huggingface"] = await test_huggingface()
# Summary
separator("SUMMARY")
for name, success in results.items():
status = "βœ… WORKING" if success else "❌ BROKEN"
print(f" {name:15s} β†’ {status}")
working = sum(1 for v in results.values() if v)
print(f"\n {working}/4 providers working")
if working == 0:
print(" ⚠️ NO PROVIDERS WORKING β€” OCR service will fail!")
sys.stdout.flush()
sys.stderr.flush()
if __name__ == "__main__":
asyncio.run(main())