Spaces:

zixuanvtzx
/

docker-bill

Sleeping

App Files Files Community

docker-bill / app.py

zixuanvtzx

Update app.py

8faccbf verified 9 months ago

raw

history blame contribute delete

3.86 kB

	import gradio as gr
	from PIL import Image
	import pytesseract
	import cv2
	import numpy as np
	import re
	import csv
	import tempfile
	import os

	# Synonyms for features
	feature_synonyms = {
	"Caller Number Display": ["Caller Number Display", "Caller ID", "Calling Number"],
	"Incoming Calls": ["Incoming Calls", "Inbound Calls"],
	"Incoming SMS": ["Incoming SMS", "Received SMS", "Inbound SMS"],
	}

	def find_feature(text, synonyms_list):
	for feature_name in synonyms_list:
	pattern = rf"({feature_name})\s*(FREE\|Included\|Yes)?"
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	return match.group(0).strip()
	return ""

	def parse_text(text):
	data = {
	"Mobile Number": "",
	"Receipt Date": "",
	"Total Amount Paid": "",
	"Local Data": "",
	"Call Time": "",
	"Number of SMS": "",
	"Roaming Details": "",
	"Caller Number Display": "",
	"Incoming Calls": "",
	"Incoming SMS": ""
	}

	mobile_match = re.search(r"Mobile\sNo\.?\s[:\-]?\s*(\d+)", text, re.IGNORECASE)
	if mobile_match:
	data["Mobile Number"] = mobile_match.group(1)

	date_match = re.search(r"Receipt Date\s[:\-]?\s([\d]{1,2}[-/][A-Za-z]{3}[-/][\d]{4})", text, re.IGNORECASE)
	if date_match:
	data["Receipt Date"] = date_match.group(1)

	total_match = re.search(r"Total Amount Paid\s*\$?([\d\.,]+)", text, re.IGNORECASE)
	if total_match:
	data["Total Amount Paid"] = total_match.group(1)

	local_data_match = re.search(r"(\d+GB\splan.?)(?:\n\|$)", text, re.IGNORECASE)
	if local_data_match:
	data["Local Data"] = local_data_match.group(1).strip()

	call_time_match = re.search(r"(\d+\smins.?)(?:\n\|$)", text, re.IGNORECASE)
	if call_time_match:
	data["Call Time"] = call_time_match.group(1).strip()

	sms_match = re.search(r"(\d+\sSMS.?)(?:\n\|$)", text, re.IGNORECASE)
	if sms_match:
	data["Number of SMS"] = sms_match.group(1).strip()

	roaming_match = re.search(r"(\d+GB.?Roaming.?)(?:\n\|$)", text, re.IGNORECASE)
	if roaming_match:
	data["Roaming Details"] = roaming_match.group(1).strip()

	data["Caller Number Display"] = find_feature(text, feature_synonyms["Caller Number Display"])
	data["Incoming Calls"] = find_feature(text, feature_synonyms["Incoming Calls"])
	data["Incoming SMS"] = find_feature(text, feature_synonyms["Incoming SMS"])

	return data

	def ocr_extract(image):
	if image is None:
	return "Please upload an image.", None

	# Convert to OpenCV format
	image_np = np.array(image)
	image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

	# Preprocess image for OCR
	gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

	# Extract text using pytesseract
	text = pytesseract.image_to_string(thresh)

	# Parse the extracted text
	parsed_data = parse_text(text)

	# Create a temp file to save CSV
	tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
	writer = csv.DictWriter(tmpfile, fieldnames=parsed_data.keys())
	writer.writeheader()
	writer.writerow(parsed_data)
	tmpfile_path = tmpfile.name
	tmpfile.close()

	# Prepare display text as before
	display_text = "\n".join(f"{k}: {v}" for k, v in parsed_data.items())

	return display_text, tmpfile_path # Return path string, not BytesIO

	iface = gr.Interface(
	fn=ocr_extract,
	inputs=gr.Image(type="pil"),
	outputs=[gr.Textbox(label="Extracted Info"), gr.File(label="Download CSV")],
	title="Bill OCR Scanner with CSV Export",
	description="Upload a bill image, extract key fields, and download the data as CSV."
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)