Spaces:
Sleeping
Sleeping
| """ | |
| database.py — SQLite database operations for the Bill/Invoice Scanner. | |
| Responsibilities: | |
| - init_db(): create the invoices table if it does not exist | |
| - save_invoice(): insert one invoice record and return the new row id | |
| - fetch_all(): return all records as a pandas DataFrame (ordered by id descending) | |
| - delete_invoice(): remove one record by its id | |
| Standard: | |
| - Each function opens and closes its own connection (thread-safe for Streamlit). | |
| - No shared global connections. | |
| """ | |
| import sqlite3 | |
| from pathlib import Path | |
| import pandas as pd | |
| from datetime import datetime | |
| # Database file path routed to /tmp to bypass Docker Read-Only locks | |
| DB_PATH = Path("/tmp/invoices.db") | |
| def init_db(): | |
| """ | |
| Initialize the SQLite database and create the invoices table if absent. | |
| Columns: | |
| - id: PK, auto-increment | |
| - file_name: TEXT (original image filename for benchmarking) | |
| - vendor: TEXT (company name) | |
| - invoice_number: TEXT (ref no) | |
| - date: TEXT (date as string) | |
| - subtotal: REAL | |
| - gst: REAL | |
| - total: REAL | |
| - raw_text: TEXT (stored for debugging/logging) | |
| - created_at: TIMESTAMP (defaults to NOW) | |
| """ | |
| conn = sqlite3.connect(DB_PATH) | |
| cursor = conn.cursor() | |
| cursor.execute(""" | |
| CREATE TABLE IF NOT EXISTS invoices ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| file_name TEXT, | |
| vendor TEXT, | |
| invoice_number TEXT, | |
| date TEXT, | |
| subtotal REAL, | |
| gst REAL, | |
| total REAL, | |
| raw_text TEXT, | |
| created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| """) | |
| conn.commit() | |
| conn.close() | |
| def save_invoice(invoice_data: dict) -> int: | |
| """ | |
| Insert a dictionary representing one invoice into the database. | |
| Args: | |
| invoice_data: Dict with keys: file_name, vendor, date, invoice_number, | |
| subtotal, gst, total, raw_text. | |
| Returns: | |
| The id (int) of the newly created row. | |
| """ | |
| conn = sqlite3.connect(DB_PATH) | |
| cursor = conn.cursor() | |
| cursor.execute(""" | |
| INSERT INTO invoices ( | |
| file_name, vendor, date, invoice_number, subtotal, gst, total, raw_text | |
| ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
| """, ( | |
| invoice_data.get("file_name"), | |
| invoice_data.get("vendor"), | |
| invoice_data.get("date"), | |
| invoice_data.get("invoice_number"), | |
| invoice_data.get("subtotal"), | |
| invoice_data.get("gst"), | |
| invoice_data.get("total"), | |
| invoice_data.get("raw_text") | |
| )) | |
| new_id = cursor.lastrowid | |
| conn.commit() | |
| conn.close() | |
| return new_id | |
| def fetch_all() -> pd.DataFrame: | |
| """ | |
| Fetch all invoice records as a pandas DataFrame. | |
| Order is strictly by ID descending (newest first). | |
| Returns: | |
| A pandas DataFrame containing all columns from the invoices table. | |
| Returns an empty DataFrame if no records exist. | |
| """ | |
| conn = sqlite3.connect(DB_PATH) | |
| df = pd.read_sql_query("SELECT * FROM invoices ORDER BY id DESC", conn) | |
| conn.close() | |
| return df | |
| def delete_invoice(invoice_id: int): | |
| """ | |
| Delete a specific invoice record by its unique ID. | |
| Args: | |
| invoice_id: The primary key (id) of the row to remove. | |
| """ | |
| conn = sqlite3.connect(DB_PATH) | |
| cursor = conn.cursor() | |
| cursor.execute("DELETE FROM invoices WHERE id = ?", (invoice_id,)) | |
| conn.commit() | |
| conn.close() | |