binder-sa
/

OCR-pipeline

Model card Files Files and versions

OCR-pipeline / gemini_requestV2.py

abdullah-1111's picture

gemini_prompts_all_ocr

65d15f6 verified 9 months ago

history blame contribute delete

2.86 kB

	import base64
	import json
	import re
	import requests
	import os

	# Your Gemini API key
	API_KEY = "your_gemini_api_key_here"

	# Path to the document image you want to extract data from
	image_path = r"C:\Users\ASUS\Downloads\CR_OCR_Cls.v4-original_images.folder\train\CR4\CS000130_CR_jpg.rf.b2528e60546ec399c3088ac69c6a66e2.jpg"# قراءة الصورة وتحويلها إلى base64
	with open(image_path, "rb") as f:
	image_b64 = base64.b64encode(f.read()).decode()

	prompt = """
	Please extract the following fields in Arabic and English from the tax document image:
	Taxpayer Name
	VAT Registration Number
	Effective Registration Date
	Taxpayer Address
	CR/License
	Contract/ID No
	Tax period
	First Filing due date

	Return the result in a JSON format with these keys:
	en_taxpayer_name, en_vat_registration_number, en_effective_registration_date, en_taxpayer_address,
	en_cr_license, en_contract_id_no, en_tax_period, en_first_filing_due_date,
	ar_taxpayer_name, ar_vat_registration_number, ar_effective_registration_date, ar_taxpayer_address,
	ar_cr_license, ar_contract_id_no, ar_tax_period, ar_first_filing_due_date

	If a field is missing, return null.
	"""

	url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={API_KEY}"

	headers = {
	"Content-Type": "application/json"
	}

	data = {
	"contents": [
	{
	"role": "user",
	"parts": [
	{"text": prompt},
	{
	"inline_data": {
	"mime_type": "image/jpeg",
	"data": image_b64
	}
	}
	]
	}
	]
	}

	response = requests.post(url, headers=headers, json=data)
	response_text = response.json()['candidates'][0]['content']['parts'][0]['text']

	# تنظيف النص من علامات ```json ... ```
	match = re.search(r"```json\s(\{.\})\s*```", response_text, re.DOTALL)
	if match:
	json_text = match.group(1)
	result = json.loads(json_text)

	# احصل على اسم الصورة بدون امتداد
	image_name = os.path.splitext(os.path.basename(image_path))[0]
	# احصل على مجلد الصورة
	image_dir = os.path.dirname(image_path)
	# مسار ملف JSON الجديد
	output_path = os.path.join(image_dir, f"{image_name}.json")

	# احفظ الناتج في ملف JSON بنفس اسم الصورة ونفس المجلد
	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(result, f, ensure_ascii=False, indent=2)

	print(f"✅ تم حفظ النتيجة في: {output_path}")
	print(json.dumps(result, indent=2, ensure_ascii=False))
	else:
	print("❌ لم أتمكن من استخراج JSON نظيف من الرد:")
	print(response_text)