Commit ·
b49767a
1
Parent(s): 95a5ca2
init Gradio on HF Space
Browse files- receipt_ai_platform/.env +2 -0
- receipt_ai_platform/Dockerfile +20 -0
- receipt_ai_platform/app/__init__.py +0 -0
- receipt_ai_platform/app/__pycache__/db.cpython-310.pyc +0 -0
- receipt_ai_platform/app/__pycache__/nlp.cpython-310.pyc +0 -0
- receipt_ai_platform/app/__pycache__/ocr.cpython-310.pyc +0 -0
- receipt_ai_platform/app/db.py +37 -0
- receipt_ai_platform/app/main.py +48 -0
- receipt_ai_platform/app/nlp.py +88 -0
- receipt_ai_platform/app/ocr.py +57 -0
- receipt_ai_platform/receipts.db +0 -0
- receipt_ai_platform/requirements.txt +5 -0
receipt_ai_platform/.env
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TESSERACT_CMD=/usr/bin/tesseract
|
| 2 |
+
GEMINI_API_KEY=AIzaSyBLjUx6iR8EYICDH_luidnGA7vRQntljgA
|
receipt_ai_platform/Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
RUN apt-get update && apt-get install -y \
|
| 4 |
+
tesseract-ocr \
|
| 5 |
+
libtesseract-dev \
|
| 6 |
+
libleptonica-dev \
|
| 7 |
+
pkg-config \
|
| 8 |
+
poppler-utils \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
EXPOSE 7860
|
| 19 |
+
|
| 20 |
+
CMD ["python", "app/main.py"]
|
receipt_ai_platform/app/__init__.py
ADDED
|
File without changes
|
receipt_ai_platform/app/__pycache__/db.cpython-310.pyc
ADDED
|
Binary file (1.4 kB). View file
|
|
|
receipt_ai_platform/app/__pycache__/nlp.cpython-310.pyc
ADDED
|
Binary file (2.76 kB). View file
|
|
|
receipt_ai_platform/app/__pycache__/ocr.cpython-310.pyc
ADDED
|
Binary file (1.57 kB). View file
|
|
|
receipt_ai_platform/app/db.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
from typing import List, Dict, Any
|
| 3 |
+
|
| 4 |
+
DB_NAME = "receipts.db"
|
| 5 |
+
|
| 6 |
+
CREATE_TABLE_QUERY = """
|
| 7 |
+
CREATE TABLE IF NOT EXISTS receipts (
|
| 8 |
+
id INTEGER PRIMARY KEY,
|
| 9 |
+
item TEXT,
|
| 10 |
+
quantity INTEGER,
|
| 11 |
+
unit_price REAL,
|
| 12 |
+
price REAL,
|
| 13 |
+
merchant TEXT,
|
| 14 |
+
date TEXT
|
| 15 |
+
);
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
INSERT_RECEIPT_QUERY = """
|
| 19 |
+
INSERT INTO receipts (item, quantity, unit_price, price, merchant, date)
|
| 20 |
+
VALUES (?, ?, ?, ?, ?, ?);
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def init_db(db_name: str = DB_NAME) -> None:
|
| 25 |
+
with sqlite3.connect(db_name) as conn:
|
| 26 |
+
conn.execute(CREATE_TABLE_QUERY)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def insert_receipt(
|
| 30 |
+
items: List[Dict[str, Any]], merchant: str, date: str, db_name: str = DB_NAME
|
| 31 |
+
) -> None:
|
| 32 |
+
values = [
|
| 33 |
+
(i["item"], i["quantity"], i["unit_price"], i["price"], merchant, date)
|
| 34 |
+
for i in items
|
| 35 |
+
]
|
| 36 |
+
with sqlite3.connect(db_name) as conn:
|
| 37 |
+
conn.executemany(INSERT_RECEIPT_QUERY, values)
|
receipt_ai_platform/app/main.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from ocr import extract_receipt_text, parse_receipt
|
| 3 |
+
from db import init_db, insert_receipt
|
| 4 |
+
from nlp import run_sql
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
init_db()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def upload_receipt(path: str) -> dict:
|
| 11 |
+
text = extract_receipt_text(path)
|
| 12 |
+
data = parse_receipt(text)
|
| 13 |
+
insert_receipt(
|
| 14 |
+
items=data["items"],
|
| 15 |
+
merchant=data["merchant"],
|
| 16 |
+
date=data["date"] or datetime.now().strftime("%d-%m-%Y")
|
| 17 |
+
)
|
| 18 |
+
return data
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def ask_ai(question: str) -> str:
|
| 22 |
+
if question.strip():
|
| 23 |
+
results = run_sql(question)
|
| 24 |
+
return results
|
| 25 |
+
return "Please add the question first."
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def ask_ai_with_status(question: str):
|
| 29 |
+
yield "Processing your question..."
|
| 30 |
+
result = run_sql(question)
|
| 31 |
+
yield result
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
with gr.Blocks() as demo:
|
| 35 |
+
gr.Markdown("## Upload your food receipt")
|
| 36 |
+
with gr.Row():
|
| 37 |
+
img_input = gr.Image(type="filepath")
|
| 38 |
+
output = gr.JSON()
|
| 39 |
+
upload_btn = gr.Button("Upload")
|
| 40 |
+
upload_btn.click(upload_receipt, inputs=img_input, outputs=output)
|
| 41 |
+
|
| 42 |
+
gr.Markdown("## Ask about your receipts")
|
| 43 |
+
question_input = gr.Textbox(label="Ask a question")
|
| 44 |
+
answer_output = gr.Markdown()
|
| 45 |
+
ask_btn = gr.Button("Ask")
|
| 46 |
+
ask_btn.click(ask_ai_with_status, inputs=question_input, outputs=answer_output)
|
| 47 |
+
|
| 48 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
receipt_ai_platform/app/nlp.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import textwrap
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from google import genai
|
| 5 |
+
from google.genai import types
|
| 6 |
+
from db import DB_NAME
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
MODEL_NAME = "gemini-1.5-flash"
|
| 10 |
+
|
| 11 |
+
client = genai.Client()
|
| 12 |
+
|
| 13 |
+
SCHEMA = textwrap.dedent("""
|
| 14 |
+
CREATE TABLE receipts (
|
| 15 |
+
id INTEGER PRIMARY KEY,
|
| 16 |
+
item TEXT,
|
| 17 |
+
quantity INTEGER,
|
| 18 |
+
unit_price REAL,
|
| 19 |
+
price REAL,
|
| 20 |
+
date TEXT,
|
| 21 |
+
merchant TEXT
|
| 22 |
+
);
|
| 23 |
+
""")
|
| 24 |
+
|
| 25 |
+
EXAMPLES = textwrap.dedent("""
|
| 26 |
+
Example rows in receipts table:
|
| 27 |
+
id: 1, item: Grilled chicken sandwich, quantity: 2, unit_price: 8.5, price: 17.0, date: 11-04-2025, merchant: Your Company Inc.
|
| 28 |
+
id: 2, item: Caesar salad, quantity: 1, unit_price: 7.0, price: 7.0, date: 11-04-2025, merchant: Your Company Inc.
|
| 29 |
+
id: 3, item: Soft drinks, quantity: 3, unit_price: 2.0, price: 6.0, date: 11-04-2025, merchant: Your Company Inc.
|
| 30 |
+
""")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def ask_gemini(prompt: str) -> str:
|
| 34 |
+
response = client.models.generate_content(
|
| 35 |
+
model=MODEL_NAME,
|
| 36 |
+
contents=prompt,
|
| 37 |
+
config=types.GenerateContentConfig(
|
| 38 |
+
thinking_config=types.ThinkingConfig(thinking_budget=0)
|
| 39 |
+
),
|
| 40 |
+
)
|
| 41 |
+
return response.text.strip()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def generate_sql(question: str) -> str:
|
| 45 |
+
prompt = textwrap.dedent(f"""
|
| 46 |
+
You are a text-to-SQL generator on a food receipts database.
|
| 47 |
+
|
| 48 |
+
Here is the SQLite schema:
|
| 49 |
+
{SCHEMA}
|
| 50 |
+
|
| 51 |
+
Here are some example rows:
|
| 52 |
+
{EXAMPLES}
|
| 53 |
+
|
| 54 |
+
Generate a valid SQL query to answer this question:
|
| 55 |
+
{question}
|
| 56 |
+
|
| 57 |
+
Rules:
|
| 58 |
+
- Only return the SQL query.
|
| 59 |
+
- Do not include explanations or markdown formatting.
|
| 60 |
+
""")
|
| 61 |
+
sql = ask_gemini(prompt).strip("`")
|
| 62 |
+
if sql.lower().startswith("sql"):
|
| 63 |
+
sql = sql[3:].strip()
|
| 64 |
+
return sql
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def run_sql(question: str) -> str:
|
| 68 |
+
sql_query = generate_sql(question)
|
| 69 |
+
print(f"Generated SQL:\n{sql_query}")
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
with sqlite3.connect(DB_NAME) as conn:
|
| 73 |
+
cur = conn.cursor()
|
| 74 |
+
cur.execute(sql_query)
|
| 75 |
+
results = cur.fetchall()
|
| 76 |
+
except Exception as e:
|
| 77 |
+
results = [("SQL Error", str(e))]
|
| 78 |
+
|
| 79 |
+
# Format into natural language
|
| 80 |
+
format_prompt = textwrap.dedent(f"""
|
| 81 |
+
You are a natural language formatter.
|
| 82 |
+
|
| 83 |
+
The user asked: {question}
|
| 84 |
+
The raw SQL result is: {results}
|
| 85 |
+
|
| 86 |
+
Write a concise, human-friendly answer.
|
| 87 |
+
""")
|
| 88 |
+
return ask_gemini(format_prompt)
|
receipt_ai_platform/app/ocr.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytesseract
|
| 2 |
+
from PIL import Image
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
import re
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
tess_cmd = os.getenv("TESSERACT_CMD")
|
| 10 |
+
if tess_cmd:
|
| 11 |
+
pytesseract.pytesseract.tesseract_cmd = tess_cmd
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_receipt_text(image_path: str) -> str:
|
| 15 |
+
img = Image.open(image_path)
|
| 16 |
+
text = pytesseract.image_to_string(img)
|
| 17 |
+
print("Extracted Text:\n", text)
|
| 18 |
+
return text
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def parse_receipt(text: str) -> dict:
|
| 22 |
+
lines = text.split("\n")
|
| 23 |
+
items = []
|
| 24 |
+
receipt_date = None
|
| 25 |
+
merchant = None
|
| 26 |
+
|
| 27 |
+
date_match = re.search(r"Receipt date[:\s]+(\d{2}-\d{2}-\d{4})", text, re.IGNORECASE)
|
| 28 |
+
if date_match:
|
| 29 |
+
receipt_date = date_match.group(1)
|
| 30 |
+
|
| 31 |
+
for line in lines:
|
| 32 |
+
if re.search(r"(inc|company|store|shop)", line, re.IGNORECASE):
|
| 33 |
+
merchant = line.strip()
|
| 34 |
+
break
|
| 35 |
+
|
| 36 |
+
for line in lines:
|
| 37 |
+
match = re.match(r"(\d+)\s+(.+?)\s+([\d]+\.\d{2})\s+\$([\d]+\.\d{2})", line)
|
| 38 |
+
if match:
|
| 39 |
+
qty, item_name, unit_price, total_price = match.groups()
|
| 40 |
+
|
| 41 |
+
if any(keyword in item_name.lower() for keyword in ["subtotal", "tax", "total"]):
|
| 42 |
+
continue
|
| 43 |
+
|
| 44 |
+
items.append(
|
| 45 |
+
{
|
| 46 |
+
"item": item_name.strip(),
|
| 47 |
+
"quantity": int(qty),
|
| 48 |
+
"unit_price": float(unit_price),
|
| 49 |
+
"price": float(total_price),
|
| 50 |
+
}
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
return {
|
| 54 |
+
"items": items,
|
| 55 |
+
"date": receipt_date,
|
| 56 |
+
"merchant": merchant
|
| 57 |
+
}
|
receipt_ai_platform/receipts.db
ADDED
|
Binary file (8.19 kB). View file
|
|
|
receipt_ai_platform/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.44.1
|
| 2 |
+
pillow==11.3.0
|
| 3 |
+
pytesseract==0.3.13
|
| 4 |
+
python-dotenv==1.1.1
|
| 5 |
+
google-genai==1.32.0
|