dipan004 commited on
Commit
d275cbe
·
verified ·
1 Parent(s): 2848bd6

Update backend/app/api/ingest.py

Browse files
Files changed (1) hide show
  1. backend/app/api/ingest.py +107 -0
backend/app/api/ingest.py CHANGED
@@ -32,6 +32,113 @@ logging.basicConfig(
32
 
33
  logger = logging.getLogger(__name__)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Setup
36
  BASE_DIR = Path(__file__).parent.parent.parent.parent
37
  STORAGE_PATH = Path(os.getenv('STORAGE_PATH', str(BASE_DIR / "data" / "docs")))
 
32
 
33
  logger = logging.getLogger(__name__)
34
 
35
+ # ============================================
36
+ # FORCE DATABASE INITIALIZATION ON MODULE LOAD
37
+ # ============================================
38
+
39
+ def init_tables_now():
40
+ """Force create tables on module import"""
41
+ try:
42
+ DB_PATH.parent.mkdir(parents=True, exist_ok=True)
43
+
44
+ with FileLock(str(LOCK_PATH), timeout=30):
45
+ conn = sqlite3.connect(str(DB_PATH))
46
+ cursor = conn.cursor()
47
+
48
+ # Check if table exists
49
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='ingest_jobs'")
50
+ if cursor.fetchone():
51
+ conn.close()
52
+ logger.info("✅ Database tables already exist")
53
+ return
54
+
55
+ logger.warning("⚠️ Creating database tables...")
56
+
57
+ # Create all tables (abbreviated version shown)
58
+ cursor.execute("""
59
+ CREATE TABLE IF NOT EXISTS ingest_jobs (
60
+ job_id TEXT PRIMARY KEY,
61
+ doc_id TEXT,
62
+ filename TEXT NOT NULL,
63
+ status TEXT NOT NULL DEFAULT 'queued',
64
+ error_text TEXT,
65
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP,
66
+ updated_at TEXT DEFAULT CURRENT_TIMESTAMP
67
+ )
68
+ """)
69
+
70
+ cursor.execute("""
71
+ CREATE TABLE IF NOT EXISTS documents (
72
+ doc_id TEXT PRIMARY KEY,
73
+ job_id TEXT NOT NULL,
74
+ path TEXT NOT NULL,
75
+ filename TEXT NOT NULL,
76
+ content_type TEXT NOT NULL,
77
+ uploaded_at TEXT DEFAULT CURRENT_TIMESTAMP
78
+ )
79
+ """)
80
+
81
+ cursor.execute("""
82
+ CREATE TABLE IF NOT EXISTS extractions (
83
+ doc_id TEXT PRIMARY KEY,
84
+ raw_text TEXT,
85
+ tables_json TEXT,
86
+ entities_json TEXT,
87
+ classification_json TEXT,
88
+ summary_text TEXT,
89
+ extracted_at TEXT DEFAULT CURRENT_TIMESTAMP
90
+ )
91
+ """)
92
+
93
+ cursor.execute("""
94
+ CREATE TABLE IF NOT EXISTS invoice_fields (
95
+ invoice_id INTEGER PRIMARY KEY AUTOINCREMENT,
96
+ doc_id TEXT NOT NULL,
97
+ cust_number TEXT,
98
+ posting_date TEXT,
99
+ total_open_amount REAL,
100
+ business_code TEXT,
101
+ cust_payment_terms TEXT,
102
+ confidence_map TEXT,
103
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP
104
+ )
105
+ """)
106
+
107
+ cursor.execute("""
108
+ CREATE TABLE IF NOT EXISTS batch_jobs (
109
+ batch_id TEXT PRIMARY KEY,
110
+ total_files INTEGER,
111
+ message TEXT,
112
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
113
+ )
114
+ """)
115
+
116
+ cursor.execute("""
117
+ CREATE TABLE IF NOT EXISTS batch_job_mapping (
118
+ batch_id TEXT,
119
+ job_id TEXT
120
+ )
121
+ """)
122
+
123
+ # Create indexes
124
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_ingest_jobs_status ON ingest_jobs(status)")
125
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_documents_job_id ON documents(job_id)")
126
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_invoice_fields_doc_id ON invoice_fields(doc_id)")
127
+
128
+ conn.commit()
129
+ conn.close()
130
+
131
+ logger.info("✅ Database tables created successfully!")
132
+
133
+ except Exception as e:
134
+ logger.error(f"❌ Failed to create tables: {e}")
135
+ import traceback
136
+ logger.error(traceback.format_exc())
137
+
138
+ # Run immediately on import
139
+ logger.info("🔍 Initializing database on module load...")
140
+ init_tables_now()
141
+
142
  # Setup
143
  BASE_DIR = Path(__file__).parent.parent.parent.parent
144
  STORAGE_PATH = Path(os.getenv('STORAGE_PATH', str(BASE_DIR / "data" / "docs")))