Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| import json | |
| import gradio as gr | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| OPENAI_TOKEN = os.getenv("OPENAI_API_KEY") | |
| if not HF_TOKEN or not OPENAI_TOKEN: | |
| raise EnvironmentError("HF_TOKEN and OPENAI_API_KEY must be set in the .env file") | |
| # Initialize clients | |
| client_qwen = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN) | |
| client_openai = OpenAI(api_key=OPENAI_TOKEN) | |
| def encode_image_to_base64(image_path: str) -> str: | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode("utf-8") | |
| def extract_text_with_qwen(base64_image: str) -> str: | |
| prompt = ( | |
| "Extract all visible printed or handwritten text from this image as accurately and cleanly as possible. " | |
| "Do not summarize or explain. Just return the extracted text clearly." | |
| ) | |
| response = client_qwen.chat.completions.create( | |
| model="Qwen/Qwen2.5-VL-7B-Instruct:hyperbolic", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} | |
| ] | |
| } | |
| ] | |
| ) | |
| return response.choices[0].message.content.strip() | |
| def analyze_text_with_openai(ocr_text: str) -> str: | |
| prompt = ( | |
| "answer in arabic. " | |
| "You are an expert assistant. The following is text extracted via OCR from an image. " | |
| "Please analyze it carefully. Return a structured JSON output that includes:\n" | |
| "- A concise summary of the content\n" | |
| "- Any critical insights, risks, or anomalies\n" | |
| "- Detected topics or categories\n" | |
| "- Language and tone characteristics (if relevant)\n\n" | |
| f"OCR TEXT:\n{ocr_text}" | |
| ) | |
| response = client_openai.chat.completions.create( | |
| model="gpt-4.1-mini", | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.choices[0].message.content.strip() | |
| def process_image(image) -> str: | |
| image_path = "temp_uploaded_image.jpg" | |
| image.save(image_path) | |
| base64_image = encode_image_to_base64(image_path) | |
| ocr_text = extract_text_with_qwen(base64_image) | |
| openai_analysis = analyze_text_with_openai(ocr_text) | |
| result = { | |
| "ocr_text": ocr_text, | |
| "openai_analysis": openai_analysis | |
| } | |
| # Save to file | |
| with open("ocr_summary_output.json", "w", encoding="utf-8") as f: | |
| json.dump(result, f, ensure_ascii=False, indent=2) | |
| return f"--- OCR TEXT ---\n{ocr_text}\n\n--- GPT ANALYSIS ---\n{openai_analysis}" | |
| # Gradio UI | |
| demo = gr.Interface( | |
| fn=process_image, | |
| inputs=gr.Image(type="pil", label="Upload Image"), | |
| outputs=gr.Textbox(label="OCR + GPT Result"), | |
| title="OCR and Insight Generator", | |
| description="Upload an image with printed or handwritten text. Qwen extracts the text. GPT-4 summarizes and analyzes it." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |