Spaces:
Sleeping
Sleeping
| -- ============================================ | |
| -- Minimal Ingest Pipeline Tables | |
| -- Version: 1.0 (Idempotent) | |
| -- ============================================ | |
| -- Table 1: ingest_jobs (job tracking) | |
| CREATE TABLE IF NOT EXISTS ingest_jobs ( | |
| job_id TEXT PRIMARY KEY, | |
| doc_id INTEGER, | |
| filename TEXT NOT NULL, | |
| status TEXT NOT NULL DEFAULT 'queued', | |
| error_text TEXT, | |
| created_at TEXT DEFAULT CURRENT_TIMESTAMP, | |
| updated_at TEXT DEFAULT CURRENT_TIMESTAMP, | |
| FOREIGN KEY (doc_id) REFERENCES documents(doc_id) | |
| ); | |
| -- Drop indexes if they exist, then recreate | |
| DROP INDEX IF EXISTS idx_ingest_jobs_status; | |
| DROP INDEX IF EXISTS idx_ingest_jobs_created; | |
| CREATE INDEX idx_ingest_jobs_status ON ingest_jobs(status); | |
| CREATE INDEX idx_ingest_jobs_created ON ingest_jobs(created_at DESC); | |
| -- Table 2: documents (file metadata) | |
| CREATE TABLE IF NOT EXISTS documents ( | |
| doc_id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| job_id TEXT NOT NULL, | |
| path TEXT NOT NULL, | |
| filename TEXT NOT NULL, | |
| content_type TEXT NOT NULL, | |
| uploaded_at TEXT DEFAULT CURRENT_TIMESTAMP, | |
| FOREIGN KEY (job_id) REFERENCES ingest_jobs(job_id) | |
| ); | |
| DROP INDEX IF EXISTS idx_documents_job_id; | |
| CREATE INDEX idx_documents_job_id ON documents(job_id); | |
| -- Table 3: extractions (agent artifacts) | |
| CREATE TABLE IF NOT EXISTS extractions ( | |
| doc_id INTEGER PRIMARY KEY, | |
| raw_text TEXT, | |
| tables_json TEXT, | |
| entities_json TEXT, | |
| classification_json TEXT, | |
| summary_text TEXT, | |
| extracted_at TEXT DEFAULT CURRENT_TIMESTAMP, | |
| FOREIGN KEY (doc_id) REFERENCES documents(doc_id) | |
| ); | |
| -- Table 4: invoice_fields (mapped fields for prediction) | |
| CREATE TABLE IF NOT EXISTS invoice_fields ( | |
| invoice_id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| doc_id INTEGER NOT NULL, | |
| cust_number TEXT, | |
| posting_date TEXT, | |
| total_open_amount REAL, | |
| business_code TEXT, | |
| cust_payment_terms TEXT, | |
| invoice_currency TEXT DEFAULT 'USD', | |
| due_in_date TEXT, | |
| confidence_map TEXT, | |
| created_at TEXT DEFAULT CURRENT_TIMESTAMP, | |
| FOREIGN KEY (doc_id) REFERENCES documents(doc_id) | |
| ); | |
| DROP INDEX IF EXISTS idx_invoice_fields_doc_id; | |
| CREATE INDEX idx_invoice_fields_doc_id ON invoice_fields(doc_id); |