binder-sa
/

OCR-pipeline-python

Model card Files Files and versions

xet

Community

abdullah-1111 commited on Aug 6, 2025

Commit

f103579

verified ·

1 Parent(s): 0f64c89

Delete gemini_requestcr4.py

Browse files

Files changed (1) hide show

gemini_requestcr4.py +0 -135

gemini_requestcr4.py DELETED Viewed

@@ -1,135 +0,0 @@
-import base64
-import json
-import re
-import requests
-import os
-# Your Gemini API key
-API_KEY = "your_gemini_api_key_here"
-#Path to the document image you want to extract data from
-image_path = r"C:\Users\ASUS\Downloads\CR_OCR_Cls.v4-original_images.folder\train\CR4\CS000130_CR_jpg.rf.b2528e60546ec399c3088ac69c6a66e2.jpg"
-with open(image_path, "rb") as f:
-    image_b64 = base64.b64encode(f.read()).decode()
-prompt = """
-Extract the following fields from the CR4 document image. Return both Arabic and English text where available:
-الرقم الموحد
-رقم المنشأة
-التاريخ
-الاسم التجاري للشركة
-نوعها
-جنسيتها
-مدة الشركة
-تبدأ من
-وتنتهي في
-مركزها الرئيسي
-هاتف
-الرمز البريدي
-النشاط
-رأس المال
-المديرون
-سلطات المدير/المديرون
-يشهد مكتب السجل التجاري بمدينة
-بأنه تم تسجيل المؤسسة المذكورة أعلاة بمدينة
-وتنتهي صلاحية الشهادات في
-بموجب الإيصال رقم
-وتاريخ
-Return as JSON with keys:
-{
-"رقم_موحد": ...,
-"رقم_المنشأة": ...,
-"التاريخ": ...,
-"الاسم_التجاري": ...,
-"نوعها": ...,
-"جنسيتها": ...,
-"مدة_الشركة": ...,
-"تبدأ_من": ...,
-"تنتهي_في": ...,
-"مركزها_الرئيسي": ...,
-"هاتف": ...,
-"الرمز_البريدي": ...,
-"النشاط": ...,
-"رأس_المال": ...,
-"المديرون": ...,
-"سلطات_المدير": ...,
-"يشهد_مكتب_السجل": ...,
-"تم_تسجيل_المؤسسة": ...,
-"تنتهي_صلاحية_الشهادة": ...,
-"الإيصال_رقم": ...,
-"الإيصال_تاريخ": ...
-}
-If a field is missing, set it to null.
-"""
-url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={API_KEY}"
-headers = {
-    "Content-Type": "application/json"
-}
-data = {
-    "contents": [
-        {
-            "role": "user",
-            "parts": [
-                {"text": prompt},
-                {
-                    "inline_data": {
-                        "mime_type": "image/jpeg",
-                        "data": image_b64
-                    }
-                }
-            ]
-        }
-    ]
-}
-response = requests.post(url, headers=headers, json=data)
-try:
-    response_text = response.json()['candidates'][0]['content']['parts'][0]['text']
-    # استخراج JSON من النص
-    match = re.search(r"```json\s*(\{.*?\})\s*```", response_text, re.DOTALL)
-    if match:
-        json_text = match.group(1)
-        result = json.loads(json_text)
-        base_name = os.path.splitext(os.path.basename(image_path))[0]
-        folder = os.path.dirname(image_path)
-        json_path = os.path.join(folder, f"{base_name}.json")
-        # إذا الملف موجود نقرأه ونضيف عليه
-        if os.path.exists(json_path):
-            with open(json_path, "r", encoding="utf-8") as f:
-                try:
-                    existing_data = json.load(f)
-                    if not isinstance(existing_data, list):
-                        existing_data = [existing_data]
-                except json.JSONDecodeError:
-                    existing_data = []
-        else:
-            existing_data = []
-        existing_data.append(result)
-        with open(json_path, "w", encoding="utf-8") as f:
-            json.dump(existing_data, f, ensure_ascii=False, indent=2)
-        print(f"✅ تم حفظ النتيجة في الملف: {json_path}")
-        print(json.dumps(result, indent=2, ensure_ascii=False))
-    else:
-        print("❌ لم أتمكن من استخراج JSON نظيف من الرد:")
-        print(response_text)
-except Exception as e:
-    print(f"❌ حدث خطأ أثناء المعالجة: {e}")
-    print("الرد الكامل من Gemini:")
-    print(response.text)