import base64
import json
import re
import requests
import os

# Your Gemini API key
API_KEY = "your_gemini_api_key_here"

# Path to the document image you want to extract data from
image_path = r"C:\Users\ASUS\Downloads\CR_OCR_Cls.v4-original_images.folder\train\CR4\CS000130_CR_jpg.rf.b2528e60546ec399c3088ac69c6a66e2.jpg"# قراءة الصورة وتحويلها إلى base64
with open(image_path, "rb") as f:
    image_b64 = base64.b64encode(f.read()).decode()

prompt = """
Please extract the following fields in Arabic and English from the tax document image:
Taxpayer Name
VAT Registration Number
Effective Registration Date
Taxpayer Address
CR/License
Contract/ID No
Tax period
First Filing due date

Return the result in a JSON format with these keys:
en_taxpayer_name, en_vat_registration_number, en_effective_registration_date, en_taxpayer_address,
en_cr_license, en_contract_id_no, en_tax_period, en_first_filing_due_date,
ar_taxpayer_name, ar_vat_registration_number, ar_effective_registration_date, ar_taxpayer_address,
ar_cr_license, ar_contract_id_no, ar_tax_period, ar_first_filing_due_date

If a field is missing, return null.
"""

url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={API_KEY}"

headers = {
    "Content-Type": "application/json"
}

data = {
    "contents": [
        {
            "role": "user",
            "parts": [
                {"text": prompt},
                {
                    "inline_data": {
                        "mime_type": "image/jpeg",
                        "data": image_b64
                    }
                }
            ]
        }
    ]
}

response = requests.post(url, headers=headers, json=data)
response_text = response.json()['candidates'][0]['content']['parts'][0]['text']

# تنظيف النص من علامات ```json ... ```
match = re.search(r"```json\s*(\{.*\})\s*```", response_text, re.DOTALL)
if match:
    json_text = match.group(1)
    result = json.loads(json_text)

    # احصل على اسم الصورة بدون امتداد
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    # احصل على مجلد الصورة
    image_dir = os.path.dirname(image_path)
    # مسار ملف JSON الجديد
    output_path = os.path.join(image_dir, f"{image_name}.json")

    # احفظ الناتج في ملف JSON بنفس اسم الصورة ونفس المجلد
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)

    print(f"✅ تم حفظ النتيجة في: {output_path}")
    print(json.dumps(result, indent=2, ensure_ascii=False))
else:
    print("❌ لم أتمكن من استخراج JSON نظيف من الرد:")
    print(response_text)