OCR-pipeline / gemini_requestV2.py
abdullah-1111's picture
gemini_prompts_all_ocr
65d15f6 verified
import base64
import json
import re
import requests
import os
# Your Gemini API key
API_KEY = "your_gemini_api_key_here"
# Path to the document image you want to extract data from
image_path = r"C:\Users\ASUS\Downloads\CR_OCR_Cls.v4-original_images.folder\train\CR4\CS000130_CR_jpg.rf.b2528e60546ec399c3088ac69c6a66e2.jpg"# قراءة الصورة وتحويلها إلى base64
with open(image_path, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode()
prompt = """
Please extract the following fields in Arabic and English from the tax document image:
Taxpayer Name
VAT Registration Number
Effective Registration Date
Taxpayer Address
CR/License
Contract/ID No
Tax period
First Filing due date
Return the result in a JSON format with these keys:
en_taxpayer_name, en_vat_registration_number, en_effective_registration_date, en_taxpayer_address,
en_cr_license, en_contract_id_no, en_tax_period, en_first_filing_due_date,
ar_taxpayer_name, ar_vat_registration_number, ar_effective_registration_date, ar_taxpayer_address,
ar_cr_license, ar_contract_id_no, ar_tax_period, ar_first_filing_due_date
If a field is missing, return null.
"""
url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={API_KEY}"
headers = {
"Content-Type": "application/json"
}
data = {
"contents": [
{
"role": "user",
"parts": [
{"text": prompt},
{
"inline_data": {
"mime_type": "image/jpeg",
"data": image_b64
}
}
]
}
]
}
response = requests.post(url, headers=headers, json=data)
response_text = response.json()['candidates'][0]['content']['parts'][0]['text']
# تنظيف النص من علامات ```json ... ```
match = re.search(r"```json\s*(\{.*\})\s*```", response_text, re.DOTALL)
if match:
json_text = match.group(1)
result = json.loads(json_text)
# احصل على اسم الصورة بدون امتداد
image_name = os.path.splitext(os.path.basename(image_path))[0]
# احصل على مجلد الصورة
image_dir = os.path.dirname(image_path)
# مسار ملف JSON الجديد
output_path = os.path.join(image_dir, f"{image_name}.json")
# احفظ الناتج في ملف JSON بنفس اسم الصورة ونفس المجلد
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"✅ تم حفظ النتيجة في: {output_path}")
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("❌ لم أتمكن من استخراج JSON نظيف من الرد:")
print(response_text)