muhammadravi251001 commited on
Commit
b49767a
·
1 Parent(s): 95a5ca2

init Gradio on HF Space

Browse files
receipt_ai_platform/.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ TESSERACT_CMD=/usr/bin/tesseract
2
+ GEMINI_API_KEY=AIzaSyBLjUx6iR8EYICDH_luidnGA7vRQntljgA
receipt_ai_platform/Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ tesseract-ocr \
5
+ libtesseract-dev \
6
+ libleptonica-dev \
7
+ pkg-config \
8
+ poppler-utils \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ WORKDIR /app
12
+
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ COPY . .
17
+
18
+ EXPOSE 7860
19
+
20
+ CMD ["python", "app/main.py"]
receipt_ai_platform/app/__init__.py ADDED
File without changes
receipt_ai_platform/app/__pycache__/db.cpython-310.pyc ADDED
Binary file (1.4 kB). View file
 
receipt_ai_platform/app/__pycache__/nlp.cpython-310.pyc ADDED
Binary file (2.76 kB). View file
 
receipt_ai_platform/app/__pycache__/ocr.cpython-310.pyc ADDED
Binary file (1.57 kB). View file
 
receipt_ai_platform/app/db.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from typing import List, Dict, Any
3
+
4
+ DB_NAME = "receipts.db"
5
+
6
+ CREATE_TABLE_QUERY = """
7
+ CREATE TABLE IF NOT EXISTS receipts (
8
+ id INTEGER PRIMARY KEY,
9
+ item TEXT,
10
+ quantity INTEGER,
11
+ unit_price REAL,
12
+ price REAL,
13
+ merchant TEXT,
14
+ date TEXT
15
+ );
16
+ """
17
+
18
+ INSERT_RECEIPT_QUERY = """
19
+ INSERT INTO receipts (item, quantity, unit_price, price, merchant, date)
20
+ VALUES (?, ?, ?, ?, ?, ?);
21
+ """
22
+
23
+
24
+ def init_db(db_name: str = DB_NAME) -> None:
25
+ with sqlite3.connect(db_name) as conn:
26
+ conn.execute(CREATE_TABLE_QUERY)
27
+
28
+
29
+ def insert_receipt(
30
+ items: List[Dict[str, Any]], merchant: str, date: str, db_name: str = DB_NAME
31
+ ) -> None:
32
+ values = [
33
+ (i["item"], i["quantity"], i["unit_price"], i["price"], merchant, date)
34
+ for i in items
35
+ ]
36
+ with sqlite3.connect(db_name) as conn:
37
+ conn.executemany(INSERT_RECEIPT_QUERY, values)
receipt_ai_platform/app/main.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ocr import extract_receipt_text, parse_receipt
3
+ from db import init_db, insert_receipt
4
+ from nlp import run_sql
5
+ from datetime import datetime
6
+
7
+ init_db()
8
+
9
+
10
+ def upload_receipt(path: str) -> dict:
11
+ text = extract_receipt_text(path)
12
+ data = parse_receipt(text)
13
+ insert_receipt(
14
+ items=data["items"],
15
+ merchant=data["merchant"],
16
+ date=data["date"] or datetime.now().strftime("%d-%m-%Y")
17
+ )
18
+ return data
19
+
20
+
21
+ def ask_ai(question: str) -> str:
22
+ if question.strip():
23
+ results = run_sql(question)
24
+ return results
25
+ return "Please add the question first."
26
+
27
+
28
+ def ask_ai_with_status(question: str):
29
+ yield "Processing your question..."
30
+ result = run_sql(question)
31
+ yield result
32
+
33
+
34
+ with gr.Blocks() as demo:
35
+ gr.Markdown("## Upload your food receipt")
36
+ with gr.Row():
37
+ img_input = gr.Image(type="filepath")
38
+ output = gr.JSON()
39
+ upload_btn = gr.Button("Upload")
40
+ upload_btn.click(upload_receipt, inputs=img_input, outputs=output)
41
+
42
+ gr.Markdown("## Ask about your receipts")
43
+ question_input = gr.Textbox(label="Ask a question")
44
+ answer_output = gr.Markdown()
45
+ ask_btn = gr.Button("Ask")
46
+ ask_btn.click(ask_ai_with_status, inputs=question_input, outputs=answer_output)
47
+
48
+ demo.launch(server_name="0.0.0.0", server_port=7860)
receipt_ai_platform/app/nlp.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import textwrap
3
+ from dotenv import load_dotenv
4
+ from google import genai
5
+ from google.genai import types
6
+ from db import DB_NAME
7
+
8
+ load_dotenv()
9
+ MODEL_NAME = "gemini-1.5-flash"
10
+
11
+ client = genai.Client()
12
+
13
+ SCHEMA = textwrap.dedent("""
14
+ CREATE TABLE receipts (
15
+ id INTEGER PRIMARY KEY,
16
+ item TEXT,
17
+ quantity INTEGER,
18
+ unit_price REAL,
19
+ price REAL,
20
+ date TEXT,
21
+ merchant TEXT
22
+ );
23
+ """)
24
+
25
+ EXAMPLES = textwrap.dedent("""
26
+ Example rows in receipts table:
27
+ id: 1, item: Grilled chicken sandwich, quantity: 2, unit_price: 8.5, price: 17.0, date: 11-04-2025, merchant: Your Company Inc.
28
+ id: 2, item: Caesar salad, quantity: 1, unit_price: 7.0, price: 7.0, date: 11-04-2025, merchant: Your Company Inc.
29
+ id: 3, item: Soft drinks, quantity: 3, unit_price: 2.0, price: 6.0, date: 11-04-2025, merchant: Your Company Inc.
30
+ """)
31
+
32
+
33
+ def ask_gemini(prompt: str) -> str:
34
+ response = client.models.generate_content(
35
+ model=MODEL_NAME,
36
+ contents=prompt,
37
+ config=types.GenerateContentConfig(
38
+ thinking_config=types.ThinkingConfig(thinking_budget=0)
39
+ ),
40
+ )
41
+ return response.text.strip()
42
+
43
+
44
+ def generate_sql(question: str) -> str:
45
+ prompt = textwrap.dedent(f"""
46
+ You are a text-to-SQL generator on a food receipts database.
47
+
48
+ Here is the SQLite schema:
49
+ {SCHEMA}
50
+
51
+ Here are some example rows:
52
+ {EXAMPLES}
53
+
54
+ Generate a valid SQL query to answer this question:
55
+ {question}
56
+
57
+ Rules:
58
+ - Only return the SQL query.
59
+ - Do not include explanations or markdown formatting.
60
+ """)
61
+ sql = ask_gemini(prompt).strip("`")
62
+ if sql.lower().startswith("sql"):
63
+ sql = sql[3:].strip()
64
+ return sql
65
+
66
+
67
+ def run_sql(question: str) -> str:
68
+ sql_query = generate_sql(question)
69
+ print(f"Generated SQL:\n{sql_query}")
70
+
71
+ try:
72
+ with sqlite3.connect(DB_NAME) as conn:
73
+ cur = conn.cursor()
74
+ cur.execute(sql_query)
75
+ results = cur.fetchall()
76
+ except Exception as e:
77
+ results = [("SQL Error", str(e))]
78
+
79
+ # Format into natural language
80
+ format_prompt = textwrap.dedent(f"""
81
+ You are a natural language formatter.
82
+
83
+ The user asked: {question}
84
+ The raw SQL result is: {results}
85
+
86
+ Write a concise, human-friendly answer.
87
+ """)
88
+ return ask_gemini(format_prompt)
receipt_ai_platform/app/ocr.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from PIL import Image
3
+ from dotenv import load_dotenv
4
+ import re
5
+ import os
6
+
7
+ load_dotenv()
8
+
9
+ tess_cmd = os.getenv("TESSERACT_CMD")
10
+ if tess_cmd:
11
+ pytesseract.pytesseract.tesseract_cmd = tess_cmd
12
+
13
+
14
+ def extract_receipt_text(image_path: str) -> str:
15
+ img = Image.open(image_path)
16
+ text = pytesseract.image_to_string(img)
17
+ print("Extracted Text:\n", text)
18
+ return text
19
+
20
+
21
+ def parse_receipt(text: str) -> dict:
22
+ lines = text.split("\n")
23
+ items = []
24
+ receipt_date = None
25
+ merchant = None
26
+
27
+ date_match = re.search(r"Receipt date[:\s]+(\d{2}-\d{2}-\d{4})", text, re.IGNORECASE)
28
+ if date_match:
29
+ receipt_date = date_match.group(1)
30
+
31
+ for line in lines:
32
+ if re.search(r"(inc|company|store|shop)", line, re.IGNORECASE):
33
+ merchant = line.strip()
34
+ break
35
+
36
+ for line in lines:
37
+ match = re.match(r"(\d+)\s+(.+?)\s+([\d]+\.\d{2})\s+\$([\d]+\.\d{2})", line)
38
+ if match:
39
+ qty, item_name, unit_price, total_price = match.groups()
40
+
41
+ if any(keyword in item_name.lower() for keyword in ["subtotal", "tax", "total"]):
42
+ continue
43
+
44
+ items.append(
45
+ {
46
+ "item": item_name.strip(),
47
+ "quantity": int(qty),
48
+ "unit_price": float(unit_price),
49
+ "price": float(total_price),
50
+ }
51
+ )
52
+
53
+ return {
54
+ "items": items,
55
+ "date": receipt_date,
56
+ "merchant": merchant
57
+ }
receipt_ai_platform/receipts.db ADDED
Binary file (8.19 kB). View file
 
receipt_ai_platform/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==5.44.1
2
+ pillow==11.3.0
3
+ pytesseract==0.3.13
4
+ python-dotenv==1.1.1
5
+ google-genai==1.32.0