File size: 4,522 Bytes

b278f6d

import base64
import json
import re
import requests
import os

# مفتاح Gemini API
API_KEY = "AIzaSyBr2-dUqHDZkk20hlWeEcpWnVVdkq9fqyE"

# المجلد الذي يحتوي صور cr1
cr1_images_folder = r"C:\Users\ASUS\OneDrive - Binder\Desktop\test.orch\classified\CR6"

# مجلد إخراج ملفات JSON
output_json_folder = r"C:\Users\ASUS\OneDrive - Binder\Desktop\test.orch\classified\CR6\cr6_json"

# Ensure output folder exists
os.makedirs(output_json_folder, exist_ok=True)

# Exact same prompt
# Exact same prompt
prompt = """
Please extract the following fields from the CR6 commercial registration document image, all in Arabic only:

- الكيان التجاري
- حالة السجل
- مدة المنشأة
- الرقم الوطني الموحد للمنشأة
- رابط المتجر الإكتروني
- رأس المال
- المدينة
- صندوق البريد
- الرمز البريدي
- هاتف
- تاريخ اصدار السجل
- تاريخ انتهاء السجل
- الموقع الاكتروني
- العنوان
- النشاط التجاري

Please return the result as JSON with the following exact keys, and if any field is missing, set its value to null:

{
  "commercial_entity": null,
  "registry_status": null,
  "establishment_duration": null,
  "unified_national_number": null,
  "online_store_link": null,
  "capital": null,
  "city": null,
  "po_box": null,
  "postal_code": null,
  "phone": null,
  "registry_issue_date": null,
  "registry_expiry_date": null,
  "website": null,
  "address": null,
  "business_activity": null,
  "building_number": null,
  "additional_number": null,
  "unit_number": null
}
"""


url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={API_KEY}"
headers = {"Content-Type": "application/json"}

# Iterate over all images
for image_name in os.listdir(cr1_images_folder):
    if not image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(cr1_images_folder, image_name)
    base_name = os.path.splitext(image_name)[0]
    output_file = os.path.join(output_json_folder, base_name + ".json")

    # Skip if JSON already exists
    if os.path.exists(output_file):
        print(f"⏩ Skipped {image_name} (JSON file already exists)")
        continue

    # Read image and convert to base64
    with open(image_path, "rb") as f:
        image_b64 = base64.b64encode(f.read()).decode()

    # Send request to Gemini API
    data = {
        "contents": [
            {
                "role": "user",
                "parts": [
                    {"text": prompt},
                    {
                        "inline_data": {
                            "mime_type": "image/jpeg",
                            "data": image_b64
                        }
                    }
                ]
            }
        ]
    }

    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        response_text = response.json()['candidates'][0]['content']['parts'][0]['text']

        match = re.search(r"```json\s*(\{.*\})\s*```", response_text, re.DOTALL)
        if match:
            json_text = match.group(1)
            result = json.loads(json_text)

            # تقسيم حقل العنوان إذا كان موجود
            address = result.get("العنوان")
            if address:
                parts = address.strip().split()
                if len(parts) == 5:
                    result["رقم المبنى"] = parts[0]
                    result["المدينة"] = parts[1]
                    result["الرمز البريدي"] = parts[2]
                    result["الرقم الإضافي"] = parts[3]
                    result["رقم الوحدة"] = parts[4]
                else:
                    result["رقم المبنى"] = None
                    result["المدينة"] = None
                    result["الرمز البريدي"] = None
                    result["الرقم الإضافي"] = None
                    result["رقم الوحدة"] = None

            with open(output_file, "w", encoding="utf-8") as f:
                json.dump(result, f, ensure_ascii=False, indent=2)

            print(f"✅ Processed: {image_name}")
        else:
            print(f"❌ Failed to extract JSON from: {image_name}")
            print(response_text)

    except Exception as e:
        print(f"❌ Error processing image {image_name}: {e}")