Jash Doshi
fix: Delete race condition - properly track ChromaDB items during deletion
ebfe0e8
# app.py
import os
import io
import json
import hashlib
import requests
import base64
from flask import Flask, request, jsonify, send_from_directory, render_template, session
import webbrowser
from flask_cors import CORS
from PIL import Image
import fitz # PyMuPDF
import rag_core
from datetime import timedelta
import traceback
import time
import re
from dotenv import load_dotenv
load_dotenv()
# --- MODIFIED: Import db and models from models.py ---
from models import db, BusinessCard, Brochure, Contact
app = Flask(__name__)
CORS(app)
# Disable template caching for development
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.auto_reload = True
# Session configuration
app.secret_key = os.environ.get("SESSION_SECRET", "a-very-secret-key-for-sessions")
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=24)
# --- FOLDER CONFIGURATION ---
UPLOAD_FOLDER = 'uploads'
DATA_FOLDER = 'user_data'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
# --- DATABASE CONFIGURATION ---
app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get(
'DATABASE_URI',
'sqlite:///local_crm.db'
)
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
# --- MODIFIED: Initialize the app with the database object ---
db.init_app(app)
# --- HARDCODED API KEY (loaded from environment) ---
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
# --- DATABASE MODEL DEFINITIONS HAVE BEEN MOVED TO models.py ---
MODEL_MAP = {
'gemini': 'google/gemma-3-4b-it:free',
'deepseek': 'google/gemma-3-27b-it:free',
'qwen': 'mistralai/mistral-small-3.1-24b-instruct:free',
'nvidia': 'nvidia/nemotron-nano-12b-v2-vl:free',
'amazon': 'amazon/nova-2-lite-v1:free'
}
# Best → fallback order (OCR strength)
FALLBACK_ORDER = [
'gemini',
'deepseek',
'qwen',
'nvidia',
'amazon'
]
# All your other functions (_call_openrouter_api_with_fallback, etc.) remain unchanged below...
def _call_openrouter_api_with_fallback(api_key, selected_model_key, prompt, images=[]):
if images:
vision_models = ['gemini','deepseek','qwen','nvidia','amazon']
models_to_try = [m for m in vision_models if m == selected_model_key]
models_to_try.extend([m for m in vision_models if m != selected_model_key])
models_to_try.extend([m for m in FALLBACK_ORDER if m not in vision_models])
else:
models_to_try = [selected_model_key]
for model in FALLBACK_ORDER:
if model != selected_model_key:
models_to_try.append(model)
last_error = None
for model_key in models_to_try:
model_name = MODEL_MAP.get(model_key)
if not model_name: continue
print(f"Attempting API call with model: {model_name}...")
content_parts = [{"type": "text", "text": prompt}]
if images and model_key in ['gemini','deepseek','qwen','nvidia','amazon']:
for img in images:
buffered = io.BytesIO()
img_format = img.format or "PNG"
img.save(buffered, format=img_format)
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
content_parts.append({
"type": "image_url",
"image_url": { "url": f"data:image/{img_format.lower()};base64,{img_base64}" }
})
elif images and model_key not in ['gemini','deepseek','qwen','nvidia','amazon']:
print(f"Skipping {model_name} - no image input support")
continue
try:
response = requests.post(
url="https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
json={"model": model_name, "messages": [{"role": "user", "content": content_parts}]},
timeout=30
)
response.raise_for_status()
api_response = response.json()
if 'choices' not in api_response or not api_response['choices']:
print(f"Model {model_name} returned empty response")
last_error = {"error": f"Model {model_name} returned empty response"}
continue
json_text = api_response['choices'][0]['message']['content']
cleaned_json_text = re.search(r'```json\s*([\s\S]+?)\s*```', json_text)
if cleaned_json_text:
json_text = cleaned_json_text.group(1)
else:
json_text = json_text.strip()
result = json.loads(json_text)
print(f"Successfully processed with model: {model_name}")
return result
except requests.exceptions.HTTPError as http_err:
error_msg = f"HTTP error occurred for model {model_name}: {http_err}"
if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}"
print(error_msg)
last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."}
continue
except requests.exceptions.Timeout:
print(f"Timeout error for model {model_name}")
last_error = {"error": f"Request timeout for model {model_name}"}
continue
except json.JSONDecodeError as json_err:
error_msg = f"JSON Decode Error for model {model_name}: {json_err}\nMalformed response: {json_text}"
print(error_msg)
last_error = {"error": f"Model {model_name} returned invalid JSON."}
continue
except Exception as e:
print(f"An error occurred with model {model_name}: {e}")
traceback.print_exc()
last_error = {"error": f"An unexpected error occurred with model {model_name}."}
continue
return last_error or {"error": "All models failed to process the request."}
def _call_openrouter_api_text_only_with_fallback(api_key, selected_model_key, prompt):
models_to_try = [selected_model_key] + [m for m in FALLBACK_ORDER if m != selected_model_key]
last_error = None
for model_key in models_to_try:
model_name = MODEL_MAP.get(model_key)
if not model_name: continue
print(f"Attempting text-only API call with model: {model_name}...")
try:
response = requests.post(
url="https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
json={"model": model_name, "messages": [{"role": "user", "content": prompt}]},
timeout=30
)
response.raise_for_status()
api_response = response.json()
if 'choices' not in api_response or not api_response['choices']:
last_error = {"error": f"Model {model_name} returned unexpected response format"}
continue
result = api_response['choices'][0]['message']['content']
print(f"Successfully processed text with model: {model_name}")
return result
except requests.exceptions.HTTPError as http_err:
error_msg = f"HTTP error occurred for model {model_name}: {http_err}"
if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}"
print(error_msg)
last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."}
continue
except requests.exceptions.Timeout:
print(f"Timeout error for model {model_name}")
last_error = {"error": f"Request timeout for model {model_name}"}
continue
except Exception as e:
print(f"An error occurred with model {model_name}: {e}")
traceback.print_exc()
last_error = {"error": f"An unexpected error occurred with model {model_name}."}
continue
if isinstance(last_error, dict) and "error" in last_error:
return last_error["error"]
return "All models failed to process the text request."
def _extract_contact_info_from_text(text):
if not text: return "", []
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
phone_pattern = r'(?:\+?\d{1,4}[-.\s]?)?(?:\(?\d{1,4}\)?[-.\s]?)?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}'
emails = re.findall(email_pattern, text, re.IGNORECASE)
phones = re.findall(phone_pattern, text)
clean_text = text
clean_text = re.sub(email_pattern, '', clean_text, flags=re.IGNORECASE)
for phone in phones:
if len(phone.replace('-', '').replace('.', '').replace(' ', '').replace('(', '').replace(')', '').replace('+', '')) >= 7:
clean_text = clean_text.replace(phone, '')
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
clean_text = re.sub(r'\n\s*\n', '\n', clean_text)
return clean_text, emails + phones
def _create_clean_info_text(brochure_data):
company_name = brochure_data.get("company_name", "")
raw_text = brochure_data.get("raw_text", "")
info_parts = []
if company_name and company_name != "Unknown Company":
info_parts.append(f"Company: {company_name}")
if raw_text:
clean_text, _ = _extract_contact_info_from_text(raw_text)
contact_phrases = [r'contact\s+us\s*:?', r'for\s+more\s+information\s*:?', r'reach\s+out\s+to\s*:?', r'get\s+in\s+touch\s*:?', r'phone\s*:', r'email\s*:', r'tel\s*:', r'mobile\s*:', r'call\s+us\s*:?', r'write\s+to\s+us\s*:?',]
for phrase in contact_phrases:
clean_text = re.sub(phrase, '', clean_text, flags=re.IGNORECASE)
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
clean_text = re.sub(r'\n\s*\n', '\n', clean_text)
if clean_text: info_parts.append(clean_text)
return "\n".join(info_parts) if info_parts else ""
def _get_user_data_filepath(user_api_key, mode):
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()[:16]
return os.path.join(DATA_FOLDER, f'{user_hash}_{mode}_data.json')
def _load_user_data(user_api_key, mode):
filepath = _get_user_data_filepath(user_api_key, mode)
try:
if os.path.exists(filepath):
with open(filepath, 'r') as f: return json.load(f)
except (IOError, json.JSONDecodeError): return []
return []
def _save_user_data(user_api_key, mode, data):
filepath = _get_user_data_filepath(user_api_key, mode)
try:
with open(filepath, 'w') as f: json.dump(data, f, indent=4)
return True
except IOError: return False
def _clean_and_validate_contacts(data):
if not data or "contacts" not in data: return data
cleaned_contacts = []
def is_placeholder(value):
if not isinstance(value, str): return True
test_val = value.strip().lower()
if not test_val: return True
placeholders = ["n/a", "na", "none", "null"]
if test_val in placeholders: return True
if "not available" in test_val or "not specified" in test_val or "not applicable" in test_val: return True
return False
for contact in data.get("contacts", []):
name = contact.get("Owner Name")
if is_placeholder(name): continue
cleaned_contacts.append({
"Owner Name": name.strip(),
"Email": None if is_placeholder(contact.get("Email")) else contact.get("Email").strip(),
"Number": None if is_placeholder(contact.get("Number")) else contact.get("Number").strip()
})
data["contacts"] = cleaned_contacts
return data
def extract_card_data(image_bytes, user_api_key, selected_model_key):
print("Processing business card with OpenRouter API...")
if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."}
try:
img = Image.open(io.BytesIO(image_bytes))
prompt = """You are an expert at reading business cards. Analyze the image and extract information into a structured JSON format. The JSON object must use these exact keys: "Owner Name", "Company Name", "Email", "Number", "Address". If a piece of information is not present, its value must be `null`. Your entire response MUST be a single, valid JSON object."""
parsed_info = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=[img])
if "error" in parsed_info: return parsed_info
return {"Owner Name": parsed_info.get("Owner Name"), "Company Name": parsed_info.get("Company Name"), "Email": parsed_info.get("Email"), "Number": parsed_info.get("Number"), "Address": parsed_info.get("Address")}
except Exception as e:
print(f"Error during OpenRouter API call for business card: {e}")
traceback.print_exc()
return {"error": f"Failed to parse AI response: {e}"}
def _extract_brochure_data_with_vision(image_list, user_api_key, selected_model_key):
print(f"Vision Extraction: Analyzing {len(image_list)} images with OpenRouter...")
if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."}
try:
prompt = """You are a world-class document analysis expert. Analyze the provided document images with maximum precision. CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) and put them in the "contacts" array. 3. Extract ALL OTHER content (company description, services, mission, addresses, general information) as "raw_text". 4. DO NOT include contact details like names, emails, or phone numbers in the raw_text. 5. Focus on separating contact information from general company information. OUTPUT FORMAT: Return a SINGLE, valid JSON object with these exact keys: "company_name", "contacts", "raw_text". The "contacts" key must contain a list of objects, each with "Owner Name", "Email", and "Number". If a piece of information is missing for a contact, use `null`. The "raw_text" should contain business information, services, descriptions, but NO contact details."""
raw_data = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=image_list)
if "error" in raw_data: return raw_data
print("AI vision extraction complete. Applying bulletproof cleaning...")
cleaned_data = _clean_and_validate_contacts(raw_data)
return cleaned_data
except Exception as e:
print(f"Error during unified brochure vision extraction: {e}")
traceback.print_exc()
return {"error": f"Failed to parse data from brochure images: {e}"}
@app.before_request
def make_session_permanent():
session.permanent = True
@app.route('/process_card', methods=['POST'])
def process_card_endpoint():
if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400
file, selected_model_key = request.files['file'], request.form.get('selectedModel')
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400
if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400
try:
image_bytes = file.read()
extracted_info = extract_card_data(image_bytes, user_api_key, selected_model_key)
if "error" in extracted_info: return jsonify(extracted_info), 500
file_id = os.urandom(8).hex()
_, f_ext = os.path.splitext(file.filename)
safe_ext = f_ext if f_ext.lower() in ['.png', '.jpg', '.jpeg', '.webp'] else '.png'
image_filename = f"{file_id}{safe_ext}"
save_path = os.path.join(UPLOAD_FOLDER, image_filename)
with open(save_path, 'wb') as f: f.write(image_bytes)
extracted_info['id'] = file_id
extracted_info['image_filename'] = image_filename
user_contacts = _load_user_data(user_api_key, 'cards')
user_contacts.insert(0, extracted_info)
_save_user_data(user_api_key, 'cards', user_contacts)
try:
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
new_card = BusinessCard(
json_id=file_id,
owner_name=extracted_info.get("Owner Name"),
company_name=extracted_info.get("Company Name"),
email=extracted_info.get("Email"),
phone_number=extracted_info.get("Number"),
address=extracted_info.get("Address"),
source_document=file.filename,
user_hash=user_hash
)
db.session.add(new_card)
db.session.commit()
print(f"Successfully saved business card for '{extracted_info.get('Owner Name')}' to the database.")
except Exception as e:
db.session.rollback()
print(f"DATABASE ERROR: Failed to save business card data. Error: {e}")
traceback.print_exc()
raw_text_for_rag = ' '.join(str(v) for k, v in extracted_info.items() if v and k not in ['id', 'image_filename'])
rag_core.add_document_to_knowledge_base(user_api_key, raw_text_for_rag, file_id, 'cards')
# Save metadata to ChromaDB for persistence across restarts
extracted_info['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, 'cards', file_id, extracted_info)
return jsonify(extracted_info)
except Exception as e:
print(f"An error occurred in process_card endpoint: {e}")
traceback.print_exc()
return jsonify({'error': 'Server processing failed'}), 500
@app.route('/process_brochure', methods=['POST'])
def process_brochure_endpoint():
if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400
file, selected_model_key = request.files['file'], request.form.get('selectedModel')
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400
if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400
try:
pdf_bytes = file.read()
pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
brochure_json_id = os.urandom(8).hex()
pdf_filename = f"{brochure_json_id}.pdf"
save_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
with open(save_path, 'wb') as f: f.write(pdf_bytes)
extracted_data = {}
full_text_from_pdf = "".join(page.get_text("text") for page in pdf_doc).strip()
if len(full_text_from_pdf) > 100:
print("'Text-First' successful. Using text model.")
try:
prompt = """Analyze the following text and structure it into a JSON object with keys "company_name", "contacts", and "raw_text". CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) into the "contacts" array. 3. Extract ALL OTHER content into "raw_text". 4. DO NOT include contact details in raw_text. "contacts" should be a list of objects with "Owner Name", "Email", and "Number". DOCUMENT TEXT: --- {full_text_from_pdf} ---"""
result = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, prompt)
if isinstance(result, str) and not result.startswith("All models failed"):
try: extracted_data = json.loads(result)
except json.JSONDecodeError: extracted_data = {}
else: extracted_data = {}
except Exception: extracted_data = {}
if "error" in extracted_data or not extracted_data:
print("Adaptive Vision: Attempting medium resolution (150 DPI)...")
med_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=150).tobytes("png"))) for page in pdf_doc]
extracted_data = _extract_brochure_data_with_vision(med_res_images, user_api_key, selected_model_key)
is_poor_quality = "error" in extracted_data or (not extracted_data.get("contacts") and len(extracted_data.get("raw_text", "")) < 50)
if is_poor_quality:
print("Medium resolution failed. Retrying with high resolution (300 DPI)...")
high_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=300).tobytes("png"))) for page in pdf_doc]
extracted_data = _extract_brochure_data_with_vision(high_res_images, user_api_key, selected_model_key)
if "error" in extracted_data: return jsonify(extracted_data), 500
final_brochure_object = {
"id": brochure_json_id,
"company_name": extracted_data.get("company_name", "Unknown Company"),
"contacts": extracted_data.get("contacts", []),
"raw_text": extracted_data.get("raw_text", ""),
"image_filename": pdf_filename
}
for contact in final_brochure_object["contacts"]: contact["id"] = os.urandom(8).hex()
user_brochures = _load_user_data(user_api_key, 'brochures')
user_brochures.insert(0, final_brochure_object)
_save_user_data(user_api_key, 'brochures', user_brochures)
try:
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
new_brochure = Brochure(
json_id=brochure_json_id,
company_name=final_brochure_object.get("company_name"),
raw_text=final_brochure_object.get("raw_text"),
source_document=file.filename,
user_hash=user_hash
)
db.session.add(new_brochure)
for contact_data in final_brochure_object.get("contacts", []):
new_contact = Contact(
json_id=contact_data['id'],
owner_name=contact_data.get("Owner Name"),
email=contact_data.get("Email"),
phone_number=contact_data.get("Number"),
brochure=new_brochure
)
db.session.add(new_contact)
db.session.commit()
print(f"Successfully saved brochure '{new_brochure.company_name}' and {len(new_brochure.contacts)} contacts to the database.")
except Exception as e:
db.session.rollback()
print(f"DATABASE ERROR: Failed to save brochure data. Error: {e}")
traceback.print_exc()
print("Indexing separated and cleaned content for high-quality RAG...")
contacts = final_brochure_object.get("contacts", [])
if contacts:
contact_text_parts = [f"Contact information for {final_brochure_object.get('company_name', 'this company')}:"]
for contact in contacts:
name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
contact_info = [f"Name: {name}"]
if email: contact_info.append(f"Email: {email}")
if number: contact_info.append(f"Phone: {number}")
contact_text_parts.append("- " + ", ".join(contact_info))
contacts_document_text = "\n".join(contact_text_parts)
rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, f"{brochure_json_id}_contacts", 'brochures')
clean_info_text = _create_clean_info_text(final_brochure_object)
if clean_info_text and clean_info_text.strip():
rag_core.add_document_to_knowledge_base(user_api_key, clean_info_text, f"{brochure_json_id}_info", 'brochures')
print("RAG indexing completed successfully!")
# Save metadata to ChromaDB for persistence across restarts
final_brochure_object['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, 'brochures', brochure_json_id, final_brochure_object)
return jsonify(final_brochure_object)
except Exception as e:
print(f"An error occurred in process_brochure endpoint: {e}")
traceback.print_exc()
return jsonify({'error': f'Server processing failed: {e}'}), 500
@app.route('/chat', methods=['POST'])
def chat_endpoint():
data = request.get_json()
query_text, mode, selected_model_key = data.get('query'), data.get('mode'), data.get('selectedModel')
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not all([user_api_key, query_text, mode, selected_model_key]): return jsonify({'error': 'Query, mode, and model are required.'}), 400
if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400
try:
session['api_key'] = user_api_key
# Save user message to chat history
rag_core.save_chat_message(user_api_key, mode, 'user', query_text)
intent = 'synthesis' if "table" in query_text.lower() or "list all" in query_text.lower() else 'research'
print(f"Intent detected: {intent}")
if intent == 'synthesis':
# Try ChromaDB first, fall back to JSON
data_source = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if not data_source:
data_source = _load_user_data(user_api_key, mode)
synthesis_data = []
if mode == 'brochures':
for brochure in data_source:
for contact in brochure.get('contacts', []):
synthesis_data.append({"Company Name": brochure.get("company_name"), "Owner Name": contact.get("Owner Name"), "Email": contact.get("Email"), "Number": contact.get("Number")})
else:
synthesis_data = data_source
synthesis_prompt = f"As a data analyst, create a markdown table based on the user's request from the following JSON data.\nJSON: {json.dumps(synthesis_data, indent=2)}\nRequest: {query_text}\nAnswer:"
answer = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, synthesis_prompt)
else:
answer = rag_core.query_knowledge_base(user_api_key, query_text, mode, selected_model_key)
# Save assistant response to chat history
rag_core.save_chat_message(user_api_key, mode, 'assistant', answer)
return jsonify({'answer': answer})
except Exception as e:
print(f"Error in /chat endpoint: {e}"); traceback.print_exc()
return jsonify({'error': 'An internal error occurred.'}), 500
@app.route('/chat_history/<mode>', methods=['GET'])
def get_chat_history_endpoint(mode):
user_api_key = OPENROUTER_API_KEY
if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
limit = request.args.get('limit', 20, type=int)
history = rag_core.get_chat_history(user_api_key, mode, limit)
return jsonify({'history': history})
@app.route('/clear_chat/<mode>', methods=['POST'])
def clear_chat_endpoint(mode):
user_api_key = OPENROUTER_API_KEY
if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
success = rag_core.clear_chat_history(user_api_key, mode)
return jsonify({'success': success})
@app.route('/sync_check/<mode>', methods=['GET'])
def sync_check_endpoint(mode):
"""Check for data updates - returns item count and hash for change detection"""
user_api_key = OPENROUTER_API_KEY
if not user_api_key:
return jsonify({'error': 'Server API key not configured'}), 400
if mode not in ['cards', 'brochures']:
return jsonify({'error': 'Invalid mode'}), 400
try:
# Get data from ChromaDB first, then fall back to JSON
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if chroma_data:
data = chroma_data
else:
data = _load_user_data(user_api_key, mode)
# Calculate count and hash of IDs for change detection
count = len(data) if data else 0
ids = sorted([item.get('id', '') for item in data]) if data else []
ids_hash = hashlib.md5(''.join(ids).encode()).hexdigest()[:8]
return jsonify({
'count': count,
'hash': ids_hash,
'timestamp': time.time()
})
except Exception as e:
print(f"Sync check error: {e}")
return jsonify({'count': 0, 'hash': '', 'timestamp': time.time()})
@app.route('/load_data/<mode>', methods=['POST'])
def load_data_endpoint(mode):
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
# Try loading from ChromaDB first (persists across restarts)
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if chroma_data:
print(f"Loaded {len(chroma_data)} items from ChromaDB for {mode}")
return jsonify(chroma_data)
# Fall back to local JSON (for backwards compatibility)
user_data = _load_user_data(user_api_key, mode)
return jsonify(user_data)
@app.route('/update_card/<mode>/<item_id>', methods=['POST'])
def update_card_endpoint(mode, item_id):
data = request.get_json()
field, value, contact_id = data.get('field'), data.get('value'), data.get('contactId')
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
# Step 1: Update JSON file (Existing Logic, Unchanged)
user_data = _load_user_data(user_api_key, mode)
item_found_in_json = False
if mode == 'cards':
for card in user_data:
if card.get('id') == item_id:
card[field] = value
item_found_in_json = True
break
elif mode == 'brochures':
for brochure in user_data:
if brochure.get('id') == item_id and contact_id:
for contact in brochure.get('contacts', []):
if contact.get('id') == contact_id:
contact[field] = value
item_found_in_json = True
break
if item_found_in_json: break
if item_found_in_json:
_save_user_data(user_api_key, mode, user_data)
# Step 1.5: Update ChromaDB (RAG knowledge base)
try:
if mode == 'cards':
# Get the updated card data
updated_card = next((c for c in user_data if c.get('id') == item_id), None)
if updated_card:
# Remove old document and re-add with updated content
rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
raw_text = ' '.join(str(v) for k, v in updated_card.items() if v and k not in ['id', 'image_filename'])
rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode)
# Also update metadata in ChromaDB
updated_card['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, updated_card)
print(f"ChromaDB: Updated document and metadata {item_id} in {mode} knowledge base")
elif mode == 'brochures' and contact_id:
# Find the brochure and re-index its contacts
brochure = next((b for b in user_data if b.get('id') == item_id), None)
if brochure:
# Remove old contacts document and re-add with updated content
contacts_doc_id = f"{item_id}_contacts"
rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
contacts = brochure.get("contacts", [])
if contacts:
contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
for contact in contacts:
name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
contact_info = [f"Name: {name}"]
if email: contact_info.append(f"Email: {email}")
if number: contact_info.append(f"Phone: {number}")
contact_text_parts.append("- " + ", ".join(contact_info))
contacts_document_text = "\n".join(contact_text_parts)
rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
# Also update metadata in ChromaDB
brochure['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
print(f"ChromaDB: Updated contacts and metadata for brochure {item_id}")
except Exception as e:
print(f"ChromaDB update warning: {e}")
# ## FINAL DATABASE CODE ##
# Step 2: Update Database (New Logic)
try:
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
if mode == 'cards':
db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first()
if db_card:
field_map = {"Owner Name": "owner_name", "Company Name": "company_name", "Email": "email", "Number": "phone_number", "Address": "address"}
db_field = field_map.get(field)
if db_field:
setattr(db_card, db_field, value)
db.session.commit()
print(f"Database updated for business card json_id: {item_id}")
return jsonify({"success": True})
elif mode == 'brochures' and contact_id:
db_contact = Contact.query.filter_by(json_id=contact_id).first()
if db_contact and db_contact.brochure.user_hash == user_hash:
field_map = {"Owner Name": "owner_name", "Email": "email", "Number": "phone_number"}
db_field = field_map.get(field)
if db_field:
setattr(db_contact, db_field, value)
db.session.commit()
print(f"Database updated for brochure contact json_id: {contact_id}")
return jsonify({"success": True})
if not item_found_in_json:
# Try to find in ChromaDB if not in JSON
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if chroma_data:
if mode == 'cards':
for card in chroma_data:
if card.get('id') == item_id:
card[field] = value
card['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, card)
# Also update RAG knowledge base
rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
raw_text = ' '.join(str(v) for k, v in card.items() if v and k not in ['id', 'image_filename', '_timestamp'])
rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode)
print(f"ChromaDB: Updated card {item_id} directly in ChromaDB")
return jsonify({"success": True})
elif mode == 'brochures' and contact_id:
for brochure in chroma_data:
if brochure.get('id') == item_id:
for contact in brochure.get('contacts', []):
if contact.get('id') == contact_id:
contact[field] = value
brochure['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
# Re-index contacts in RAG
contacts_doc_id = f"{item_id}_contacts"
rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
contacts = brochure.get("contacts", [])
if contacts:
contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
for c in contacts:
name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number")
contact_info = [f"Name: {name}"]
if email: contact_info.append(f"Email: {email}")
if number: contact_info.append(f"Phone: {number}")
contact_text_parts.append("- " + ", ".join(contact_info))
contacts_document_text = "\n".join(contact_text_parts)
rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
print(f"ChromaDB: Updated brochure contact {contact_id} directly in ChromaDB")
return jsonify({"success": True})
return jsonify({"success": False, "message": "Item not found"}), 404
return jsonify({"success": True})
except Exception as e:
db.session.rollback()
print(f"DATABASE ERROR: Failed to update record. Error: {e}")
return jsonify({"success": False, "message": "Database update failed."}), 500
# ## END FINAL DATABASE CODE ##
@app.route('/delete_card/<mode>/<item_id>', methods=['DELETE'])
def delete_card_endpoint(mode, item_id):
data = request.get_json()
contact_id = data.get('contactId')
user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
# Step 1: Delete from JSON file (Existing Logic, Unchanged)
user_data = _load_user_data(user_api_key, mode)
item_found_in_json = False
original_len = len(user_data)
if mode == 'cards':
user_data = [c for c in user_data if c.get('id') != item_id]
if len(user_data) < original_len: item_found_in_json = True
elif mode == 'brochures':
if contact_id:
for brochure in user_data:
if brochure.get('id') == item_id:
original_contacts_len = len(brochure.get('contacts', []))
brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
if len(brochure.get('contacts', [])) < original_contacts_len:
item_found_in_json = True
break
else: # Delete whole brochure
user_data = [b for b in user_data if b.get('id') != item_id]
if len(user_data) < original_len: item_found_in_json = True
if item_found_in_json:
_save_user_data(user_api_key, mode, user_data)
# Step 1.5: Delete from ChromaDB (RAG knowledge base)
item_found_in_chroma = False
try:
# Check if item exists in ChromaDB before deleting
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if chroma_data:
if mode == 'cards':
item_found_in_chroma = any(c.get('id') == item_id for c in chroma_data)
elif mode == 'brochures':
if contact_id:
brochure = next((b for b in chroma_data if b.get('id') == item_id), None)
if brochure:
item_found_in_chroma = any(c.get('id') == contact_id for c in brochure.get('contacts', []))
else:
item_found_in_chroma = any(b.get('id') == item_id for b in chroma_data)
if mode == 'cards':
# Remove card document from ChromaDB
rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
# Also delete metadata from ChromaDB
rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
print(f"ChromaDB: Removed document and metadata {item_id} from {mode} knowledge base")
elif mode == 'brochures':
if contact_id:
# Contact deleted - re-index the brochure's contacts document
brochure = next((b for b in user_data if b.get('id') == item_id), None)
# Also check ChromaDB data if not found in JSON
if not brochure and chroma_data:
brochure = next((b for b in chroma_data if b.get('id') == item_id), None)
if brochure:
# Remove the contact from the brochure in ChromaDB
brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
if brochure:
contacts_doc_id = f"{item_id}_contacts"
rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
contacts = brochure.get("contacts", [])
if contacts:
contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
for contact in contacts:
name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
contact_info = [f"Name: {name}"]
if email: contact_info.append(f"Email: {email}")
if number: contact_info.append(f"Phone: {number}")
contact_text_parts.append("- " + ", ".join(contact_info))
contacts_document_text = "\n".join(contact_text_parts)
rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
print(f"ChromaDB: Re-indexed contacts for brochure {item_id} after contact deletion")
# Update metadata in ChromaDB (re-save brochure with updated contacts)
brochure['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
else:
# Whole brochure deleted - remove both contacts and info documents
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
# Also delete metadata from ChromaDB
rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
print(f"ChromaDB: Removed brochure {item_id} documents and metadata from knowledge base")
except Exception as e:
print(f"ChromaDB removal warning: {e}")
# ## FINAL DATABASE CODE ##
# Step 2: Delete from Database (New Logic)
try:
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
if mode == 'cards':
db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first()
if db_card:
db.session.delete(db_card)
db.session.commit()
print(f"Database record deleted for business card json_id: {item_id}")
return jsonify({"success": True})
elif mode == 'brochures':
if contact_id:
db_contact = Contact.query.filter_by(json_id=contact_id).first()
if db_contact and db_contact.brochure.user_hash == user_hash:
db.session.delete(db_contact)
db.session.commit()
print(f"Database record deleted for brochure contact json_id: {contact_id}")
return jsonify({"success": True})
else: # Delete whole brochure
db_brochure = Brochure.query.filter_by(json_id=item_id, user_hash=user_hash).first()
if db_brochure:
db.session.delete(db_brochure) # Cascading delete will handle linked contacts
db.session.commit()
print(f"Database record deleted for brochure json_id: {item_id}")
return jsonify({"success": True})
if not item_found_in_json and not item_found_in_chroma:
# Try to find in ChromaDB if not in JSON (should rarely happen now)
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
if chroma_data:
if mode == 'cards':
for card in chroma_data:
if card.get('id') == item_id:
rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
print(f"ChromaDB: Deleted card {item_id} directly from ChromaDB")
return jsonify({"success": True})
elif mode == 'brochures':
for brochure in chroma_data:
if brochure.get('id') == item_id:
if contact_id:
brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
brochure['_timestamp'] = time.time()
rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
# Re-index contacts in RAG
contacts_doc_id = f"{item_id}_contacts"
rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
contacts = brochure.get("contacts", [])
if contacts:
contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
for c in contacts:
name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number")
contact_info = [f"Name: {name}"]
if email: contact_info.append(f"Email: {email}")
if number: contact_info.append(f"Phone: {number}")
contact_text_parts.append("- " + ", ".join(contact_info))
contacts_document_text = "\n".join(contact_text_parts)
rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
print(f"ChromaDB: Deleted contact {contact_id} from brochure {item_id} in ChromaDB")
return jsonify({"success": True})
else:
# Delete whole brochure
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
print(f"ChromaDB: Deleted brochure {item_id} directly from ChromaDB")
return jsonify({"success": True})
return jsonify({"success": False, "message": "Item not found"}), 404
return jsonify({"success": True})
except Exception as e:
db.session.rollback()
print(f"DATABASE ERROR: Failed to delete record. Error: {e}")
return jsonify({"success": False, "message": "Database delete failed."}), 500
# ## END FINAL DATABASE CODE ##
@app.route('/delete_all/<mode>', methods=['DELETE'])
def delete_all_endpoint(mode):
"""Delete all items for a given mode (cards or brochures)"""
user_api_key = OPENROUTER_API_KEY
if not user_api_key:
return jsonify({'error': 'Server API key not configured'}), 400
if mode not in ['cards', 'brochures']:
return jsonify({'error': 'Invalid mode'}), 400
deleted_count = 0
try:
# Step 1: Count items before deletion (from both sources)
user_data = _load_user_data(user_api_key, mode)
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
# Get count from whichever source has more
deleted_count = max(len(user_data), len(chroma_data) if chroma_data else 0)
if deleted_count == 0:
return jsonify({
'success': True,
'deleted_count': 0,
'message': f'No {mode} to delete'
})
print(f"Starting deletion of {deleted_count} {mode}...")
# Step 2: Clear JSON file
_save_user_data(user_api_key, mode, [])
print(f"Cleared JSON file for {mode}")
# Step 3: Delete ALL metadata from ChromaDB (bulk delete)
metadata_deleted = rag_core.delete_all_metadata_from_chroma(user_api_key, mode)
print(f"Deleted {metadata_deleted} metadata records from ChromaDB")
# Step 4: Delete ALL document chunks from ChromaDB (bulk delete)
docs_deleted = rag_core.delete_all_documents_from_chroma(user_api_key, mode)
print(f"Deleted {docs_deleted} document chunks from ChromaDB")
# Step 5: Delete from SQL Database
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
if mode == 'cards':
db_deleted = BusinessCard.query.filter_by(user_hash=user_hash).delete()
print(f"Deleted {db_deleted} business cards from SQL database")
elif mode == 'brochures':
# Delete all brochures and their contacts (cascade)
db_deleted = Brochure.query.filter_by(user_hash=user_hash).delete()
print(f"Deleted {db_deleted} brochures from SQL database")
db.session.commit()
print(f"Successfully deleted all {deleted_count} {mode} from all storage layers")
return jsonify({
'success': True,
'deleted_count': deleted_count,
'message': f'Successfully deleted {deleted_count} {mode}'
})
except Exception as e:
db.session.rollback()
print(f"DATABASE ERROR: Failed to delete all {mode}. Error: {e}")
traceback.print_exc()
return jsonify({
'success': False,
'message': f'Failed to delete all {mode}: {str(e)}'
}), 500
@app.route('/')
def serve_dashboard():
return render_template('index.html')
@app.route('/uploads/<filename>')
def uploaded_file(filename):
return send_from_directory(UPLOAD_FOLDER, filename)
# Health check endpoint - responds immediately without waiting for model loading
@app.route('/health')
def health_check():
return jsonify({"status": "ok", "message": "Service is running"}), 200
# Create database tables (lightweight - runs at import time)
with app.app_context():
db.create_all()
print("Database tables (business_card, brochure, contact) checked and created if necessary.")
# Lazy initialization for RAG system (deferred until first request)
_rag_initialized = False
@app.before_request
def ensure_rag_initialized():
global _rag_initialized
# Skip initialization for health checks and static files
if request.endpoint in ('health_check', 'uploaded_file', 'static', 'serve_dashboard'):
return
if not _rag_initialized:
print("First request received - initializing RAG system...")
try:
success = rag_core.initialize_rag_system()
_rag_initialized = True
if success:
print("RAG system initialized successfully!")
else:
print("RAG system not available - OCR features will still work")
except Exception as e:
print(f"RAG initialization error (non-fatal): {e}")
_rag_initialized = True # Mark as attempted so we don't retry
if __name__ == "__main__":
# Local development - initialize immediately for better dev experience
try:
rag_core.initialize_rag_system()
except Exception as e:
print(f"RAG initialization failed: {e}")
print("App will start without RAG features")
print("--- Server is starting! ---")
print(f"User-specific data will be saved in '{os.path.abspath(DATA_FOLDER)}'")
print("To use the dashboard, open your web browser and go to: http://127.0.0.1:5000")
webbrowser.open_new('http://127.0.0.1:5000')
app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)