GSoumyajit2005 commited on
Commit
8f86a3c
·
1 Parent(s): 4bdd01c

perf: fully async DB - results in 5-7s, background check+save

Browse files
Files changed (1) hide show
  1. src/pipeline.py +17 -25
src/pipeline.py CHANGED
@@ -141,38 +141,30 @@ def process_invoice(image_path: str,
141
  final_data['semantic_hash'] = generate_semantic_hash(final_data)
142
 
143
  # --- DATABASE SAVE (ASYNC - Fire and Forget) ---
144
- def background_save(data_to_save):
145
- """Save to database in background thread"""
146
  try:
147
  repo = InvoiceRepository()
148
  if repo.session:
149
- saved = repo.save_invoice(data_to_save)
150
- if saved:
151
- print(f" ✅ [Background] Invoice saved: {data_to_save.get('receipt_number')}")
 
152
  else:
153
- print(f" ⚠️ [Background] Duplicate or error for: {data_to_save.get('receipt_number')}")
 
 
 
 
 
154
  except Exception as e:
155
- print(f" ⚠️ [Background] Save failed: {e}")
156
 
157
  if DB_CONNECTED:
158
- # Quick duplicate check before queueing save
159
- try:
160
- repo = InvoiceRepository()
161
- if repo.session:
162
- existing = repo.get_by_hash(final_data.get('semantic_hash', ''))
163
- if existing:
164
- print(" ⚠️ Duplicate invoice (already in database)")
165
- final_data['_db_status'] = 'duplicate'
166
- else:
167
- # Not a duplicate - save in background
168
- save_thread = threading.Thread(target=background_save, args=(final_data.copy(),))
169
- save_thread.start()
170
- final_data['_db_status'] = 'queued'
171
- else:
172
- final_data['_db_status'] = 'disabled'
173
- except Exception as e:
174
- print(f" ⚠️ Duplicate check failed: {e}")
175
- final_data['_db_status'] = 'error'
176
  else:
177
  final_data['_db_status'] = 'disabled'
178
 
 
141
  final_data['semantic_hash'] = generate_semantic_hash(final_data)
142
 
143
  # --- DATABASE SAVE (ASYNC - Fire and Forget) ---
144
+ def background_db_operation(data_to_save):
145
+ """Check for duplicate and save in background thread"""
146
  try:
147
  repo = InvoiceRepository()
148
  if repo.session:
149
+ # Check for duplicate first
150
+ existing = repo.get_by_hash(data_to_save.get('semantic_hash', ''))
151
+ if existing:
152
+ print(f" ⚠️ [Background] Duplicate: {data_to_save.get('receipt_number')}")
153
  else:
154
+ # Not a duplicate - save it
155
+ saved = repo.save_invoice(data_to_save)
156
+ if saved:
157
+ print(f" ✅ [Background] Saved: {data_to_save.get('receipt_number')}")
158
+ else:
159
+ print(f" ⚠️ [Background] Save failed: {data_to_save.get('receipt_number')}")
160
  except Exception as e:
161
+ print(f" ⚠️ [Background] DB Error: {e}")
162
 
163
  if DB_CONNECTED:
164
+ # Fire and forget - don't wait for result
165
+ save_thread = threading.Thread(target=background_db_operation, args=(final_data.copy(),))
166
+ save_thread.start()
167
+ final_data['_db_status'] = 'queued'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  else:
169
  final_data['_db_status'] = 'disabled'
170