Spaces:

jashdoshi77
/

visionextract-crm

Running

Jash Doshi

fix: Delete race condition - properly track ChromaDB items during deletion

ebfe0e8 17 days ago

53.7 kB

	# app.py

	import os
	import io
	import json
	import hashlib
	import requests
	import base64
	from flask import Flask, request, jsonify, send_from_directory, render_template, session
	import webbrowser
	from flask_cors import CORS
	from PIL import Image
	import fitz # PyMuPDF
	import rag_core
	from datetime import timedelta
	import traceback
	import time
	import re

	from dotenv import load_dotenv
	load_dotenv()

	# --- MODIFIED: Import db and models from models.py ---
	from models import db, BusinessCard, Brochure, Contact


	app = Flask(__name__)
	CORS(app)

	# Disable template caching for development
	app.config['TEMPLATES_AUTO_RELOAD'] = True
	app.jinja_env.auto_reload = True

	# Session configuration
	app.secret_key = os.environ.get("SESSION_SECRET", "a-very-secret-key-for-sessions")
	app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=24)

	# --- FOLDER CONFIGURATION ---
	UPLOAD_FOLDER = 'uploads'
	DATA_FOLDER = 'user_data'
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

	if not os.path.exists(UPLOAD_FOLDER):
	os.makedirs(UPLOAD_FOLDER)
	if not os.path.exists(DATA_FOLDER):
	os.makedirs(DATA_FOLDER)

	# --- DATABASE CONFIGURATION ---
	app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get(
	'DATABASE_URI',
	'sqlite:///local_crm.db'
	)
	app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

	# --- MODIFIED: Initialize the app with the database object ---
	db.init_app(app)

	# --- HARDCODED API KEY (loaded from environment) ---
	OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")


	# --- DATABASE MODEL DEFINITIONS HAVE BEEN MOVED TO models.py ---


	MODEL_MAP = {
	'gemini': 'google/gemma-3-4b-it:free',
	'deepseek': 'google/gemma-3-27b-it:free',

	'qwen': 'mistralai/mistral-small-3.1-24b-instruct:free',
	'nvidia': 'nvidia/nemotron-nano-12b-v2-vl:free',
	'amazon': 'amazon/nova-2-lite-v1:free'
	}

	# Best → fallback order (OCR strength)
	FALLBACK_ORDER = [
	'gemini',
	'deepseek',
	'qwen',
	'nvidia',
	'amazon'
	]



	# All your other functions (_call_openrouter_api_with_fallback, etc.) remain unchanged below...
	def _call_openrouter_api_with_fallback(api_key, selected_model_key, prompt, images=[]):
	if images:
	vision_models = ['gemini','deepseek','qwen','nvidia','amazon']
	models_to_try = [m for m in vision_models if m == selected_model_key]
	models_to_try.extend([m for m in vision_models if m != selected_model_key])
	models_to_try.extend([m for m in FALLBACK_ORDER if m not in vision_models])
	else:
	models_to_try = [selected_model_key]
	for model in FALLBACK_ORDER:
	if model != selected_model_key:
	models_to_try.append(model)

	last_error = None

	for model_key in models_to_try:
	model_name = MODEL_MAP.get(model_key)
	if not model_name: continue

	print(f"Attempting API call with model: {model_name}...")
	content_parts = [{"type": "text", "text": prompt}]

	if images and model_key in ['gemini','deepseek','qwen','nvidia','amazon']:
	for img in images:
	buffered = io.BytesIO()
	img_format = img.format or "PNG"
	img.save(buffered, format=img_format)
	img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
	content_parts.append({
	"type": "image_url",
	"image_url": { "url": f"data:image/{img_format.lower()};base64,{img_base64}" }
	})
	elif images and model_key not in ['gemini','deepseek','qwen','nvidia','amazon']:
	print(f"Skipping {model_name} - no image input support")
	continue

	try:
	response = requests.post(
	url="https://openrouter.ai/api/v1/chat/completions",
	headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
	json={"model": model_name, "messages": [{"role": "user", "content": content_parts}]},
	timeout=30
	)
	response.raise_for_status()
	api_response = response.json()

	if 'choices' not in api_response or not api_response['choices']:
	print(f"Model {model_name} returned empty response")
	last_error = {"error": f"Model {model_name} returned empty response"}
	continue

	json_text = api_response['choices'][0]['message']['content']

	cleaned_json_text = re.search(r'```json\s([\s\S]+?)\s```', json_text)
	if cleaned_json_text:
	json_text = cleaned_json_text.group(1)
	else:
	json_text = json_text.strip()

	result = json.loads(json_text)
	print(f"Successfully processed with model: {model_name}")
	return result
	except requests.exceptions.HTTPError as http_err:
	error_msg = f"HTTP error occurred for model {model_name}: {http_err}"
	if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}"
	print(error_msg)
	last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."}
	continue
	except requests.exceptions.Timeout:
	print(f"Timeout error for model {model_name}")
	last_error = {"error": f"Request timeout for model {model_name}"}
	continue
	except json.JSONDecodeError as json_err:
	error_msg = f"JSON Decode Error for model {model_name}: {json_err}\nMalformed response: {json_text}"
	print(error_msg)
	last_error = {"error": f"Model {model_name} returned invalid JSON."}
	continue
	except Exception as e:
	print(f"An error occurred with model {model_name}: {e}")
	traceback.print_exc()
	last_error = {"error": f"An unexpected error occurred with model {model_name}."}
	continue

	return last_error or {"error": "All models failed to process the request."}

	def _call_openrouter_api_text_only_with_fallback(api_key, selected_model_key, prompt):
	models_to_try = [selected_model_key] + [m for m in FALLBACK_ORDER if m != selected_model_key]
	last_error = None
	for model_key in models_to_try:
	model_name = MODEL_MAP.get(model_key)
	if not model_name: continue
	print(f"Attempting text-only API call with model: {model_name}...")
	try:
	response = requests.post(
	url="https://openrouter.ai/api/v1/chat/completions",
	headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
	json={"model": model_name, "messages": [{"role": "user", "content": prompt}]},
	timeout=30
	)
	response.raise_for_status()
	api_response = response.json()
	if 'choices' not in api_response or not api_response['choices']:
	last_error = {"error": f"Model {model_name} returned unexpected response format"}
	continue
	result = api_response['choices'][0]['message']['content']
	print(f"Successfully processed text with model: {model_name}")
	return result
	except requests.exceptions.HTTPError as http_err:
	error_msg = f"HTTP error occurred for model {model_name}: {http_err}"
	if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}"
	print(error_msg)
	last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."}
	continue
	except requests.exceptions.Timeout:
	print(f"Timeout error for model {model_name}")
	last_error = {"error": f"Request timeout for model {model_name}"}
	continue
	except Exception as e:
	print(f"An error occurred with model {model_name}: {e}")
	traceback.print_exc()
	last_error = {"error": f"An unexpected error occurred with model {model_name}."}
	continue
	if isinstance(last_error, dict) and "error" in last_error:
	return last_error["error"]
	return "All models failed to process the text request."


	def _extract_contact_info_from_text(text):
	if not text: return "", []
	email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b'
	phone_pattern = r'(?:\+?\d{1,4}[-.\s]?)?(?:\(?\d{1,4}\)?[-.\s]?)?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}'
	emails = re.findall(email_pattern, text, re.IGNORECASE)
	phones = re.findall(phone_pattern, text)
	clean_text = text
	clean_text = re.sub(email_pattern, '', clean_text, flags=re.IGNORECASE)
	for phone in phones:
	if len(phone.replace('-', '').replace('.', '').replace(' ', '').replace('(', '').replace(')', '').replace('+', '')) >= 7:
	clean_text = clean_text.replace(phone, '')
	clean_text = re.sub(r'\s+', ' ', clean_text).strip()
	clean_text = re.sub(r'\n\s*\n', '\n', clean_text)
	return clean_text, emails + phones

	def _create_clean_info_text(brochure_data):
	company_name = brochure_data.get("company_name", "")
	raw_text = brochure_data.get("raw_text", "")
	info_parts = []
	if company_name and company_name != "Unknown Company":
	info_parts.append(f"Company: {company_name}")
	if raw_text:
	clean_text, _ = _extract_contact_info_from_text(raw_text)
	contact_phrases = [r'contact\s+us\s:?', r'for\s+more\s+information\s:?', r'reach\s+out\s+to\s:?', r'get\s+in\s+touch\s:?', r'phone\s:', r'email\s:', r'tel\s:', r'mobile\s:', r'call\s+us\s:?', r'write\s+to\s+us\s:?',]
	for phrase in contact_phrases:
	clean_text = re.sub(phrase, '', clean_text, flags=re.IGNORECASE)
	clean_text = re.sub(r'\s+', ' ', clean_text).strip()
	clean_text = re.sub(r'\n\s*\n', '\n', clean_text)
	if clean_text: info_parts.append(clean_text)
	return "\n".join(info_parts) if info_parts else ""

	def _get_user_data_filepath(user_api_key, mode):
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()[:16]
	return os.path.join(DATA_FOLDER, f'{user_hash}_{mode}_data.json')

	def _load_user_data(user_api_key, mode):
	filepath = _get_user_data_filepath(user_api_key, mode)
	try:
	if os.path.exists(filepath):
	with open(filepath, 'r') as f: return json.load(f)
	except (IOError, json.JSONDecodeError): return []
	return []

	def _save_user_data(user_api_key, mode, data):
	filepath = _get_user_data_filepath(user_api_key, mode)
	try:
	with open(filepath, 'w') as f: json.dump(data, f, indent=4)
	return True
	except IOError: return False

	def _clean_and_validate_contacts(data):
	if not data or "contacts" not in data: return data
	cleaned_contacts = []
	def is_placeholder(value):
	if not isinstance(value, str): return True
	test_val = value.strip().lower()
	if not test_val: return True
	placeholders = ["n/a", "na", "none", "null"]
	if test_val in placeholders: return True
	if "not available" in test_val or "not specified" in test_val or "not applicable" in test_val: return True
	return False
	for contact in data.get("contacts", []):
	name = contact.get("Owner Name")
	if is_placeholder(name): continue
	cleaned_contacts.append({
	"Owner Name": name.strip(),
	"Email": None if is_placeholder(contact.get("Email")) else contact.get("Email").strip(),
	"Number": None if is_placeholder(contact.get("Number")) else contact.get("Number").strip()
	})
	data["contacts"] = cleaned_contacts
	return data

	def extract_card_data(image_bytes, user_api_key, selected_model_key):
	print("Processing business card with OpenRouter API...")
	if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."}
	try:
	img = Image.open(io.BytesIO(image_bytes))
	prompt = """You are an expert at reading business cards. Analyze the image and extract information into a structured JSON format. The JSON object must use these exact keys: "Owner Name", "Company Name", "Email", "Number", "Address". If a piece of information is not present, its value must be `null`. Your entire response MUST be a single, valid JSON object."""
	parsed_info = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=[img])
	if "error" in parsed_info: return parsed_info
	return {"Owner Name": parsed_info.get("Owner Name"), "Company Name": parsed_info.get("Company Name"), "Email": parsed_info.get("Email"), "Number": parsed_info.get("Number"), "Address": parsed_info.get("Address")}
	except Exception as e:
	print(f"Error during OpenRouter API call for business card: {e}")
	traceback.print_exc()
	return {"error": f"Failed to parse AI response: {e}"}

	def _extract_brochure_data_with_vision(image_list, user_api_key, selected_model_key):
	print(f"Vision Extraction: Analyzing {len(image_list)} images with OpenRouter...")
	if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."}
	try:
	prompt = """You are a world-class document analysis expert. Analyze the provided document images with maximum precision. CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) and put them in the "contacts" array. 3. Extract ALL OTHER content (company description, services, mission, addresses, general information) as "raw_text". 4. DO NOT include contact details like names, emails, or phone numbers in the raw_text. 5. Focus on separating contact information from general company information. OUTPUT FORMAT: Return a SINGLE, valid JSON object with these exact keys: "company_name", "contacts", "raw_text". The "contacts" key must contain a list of objects, each with "Owner Name", "Email", and "Number". If a piece of information is missing for a contact, use `null`. The "raw_text" should contain business information, services, descriptions, but NO contact details."""
	raw_data = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=image_list)
	if "error" in raw_data: return raw_data
	print("AI vision extraction complete. Applying bulletproof cleaning...")
	cleaned_data = _clean_and_validate_contacts(raw_data)
	return cleaned_data
	except Exception as e:
	print(f"Error during unified brochure vision extraction: {e}")
	traceback.print_exc()
	return {"error": f"Failed to parse data from brochure images: {e}"}

	@app.before_request
	def make_session_permanent():
	session.permanent = True

	@app.route('/process_card', methods=['POST'])
	def process_card_endpoint():
	if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400
	file, selected_model_key = request.files['file'], request.form.get('selectedModel')
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400
	if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400

	try:
	image_bytes = file.read()
	extracted_info = extract_card_data(image_bytes, user_api_key, selected_model_key)
	if "error" in extracted_info: return jsonify(extracted_info), 500

	file_id = os.urandom(8).hex()
	_, f_ext = os.path.splitext(file.filename)
	safe_ext = f_ext if f_ext.lower() in ['.png', '.jpg', '.jpeg', '.webp'] else '.png'
	image_filename = f"{file_id}{safe_ext}"
	save_path = os.path.join(UPLOAD_FOLDER, image_filename)
	with open(save_path, 'wb') as f: f.write(image_bytes)

	extracted_info['id'] = file_id
	extracted_info['image_filename'] = image_filename

	user_contacts = _load_user_data(user_api_key, 'cards')
	user_contacts.insert(0, extracted_info)
	_save_user_data(user_api_key, 'cards', user_contacts)

	try:
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
	new_card = BusinessCard(
	json_id=file_id,
	owner_name=extracted_info.get("Owner Name"),
	company_name=extracted_info.get("Company Name"),
	email=extracted_info.get("Email"),
	phone_number=extracted_info.get("Number"),
	address=extracted_info.get("Address"),
	source_document=file.filename,
	user_hash=user_hash
	)
	db.session.add(new_card)
	db.session.commit()
	print(f"Successfully saved business card for '{extracted_info.get('Owner Name')}' to the database.")
	except Exception as e:
	db.session.rollback()
	print(f"DATABASE ERROR: Failed to save business card data. Error: {e}")
	traceback.print_exc()

	raw_text_for_rag = ' '.join(str(v) for k, v in extracted_info.items() if v and k not in ['id', 'image_filename'])
	rag_core.add_document_to_knowledge_base(user_api_key, raw_text_for_rag, file_id, 'cards')

	# Save metadata to ChromaDB for persistence across restarts
	extracted_info['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, 'cards', file_id, extracted_info)

	return jsonify(extracted_info)
	except Exception as e:
	print(f"An error occurred in process_card endpoint: {e}")
	traceback.print_exc()
	return jsonify({'error': 'Server processing failed'}), 500

	@app.route('/process_brochure', methods=['POST'])
	def process_brochure_endpoint():
	if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400
	file, selected_model_key = request.files['file'], request.form.get('selectedModel')
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400
	if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400

	try:
	pdf_bytes = file.read()
	pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")

	brochure_json_id = os.urandom(8).hex()
	pdf_filename = f"{brochure_json_id}.pdf"
	save_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
	with open(save_path, 'wb') as f: f.write(pdf_bytes)

	extracted_data = {}
	full_text_from_pdf = "".join(page.get_text("text") for page in pdf_doc).strip()

	if len(full_text_from_pdf) > 100:
	print("'Text-First' successful. Using text model.")
	try:
	prompt = """Analyze the following text and structure it into a JSON object with keys "company_name", "contacts", and "raw_text". CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) into the "contacts" array. 3. Extract ALL OTHER content into "raw_text". 4. DO NOT include contact details in raw_text. "contacts" should be a list of objects with "Owner Name", "Email", and "Number". DOCUMENT TEXT: --- {full_text_from_pdf} ---"""
	result = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, prompt)
	if isinstance(result, str) and not result.startswith("All models failed"):
	try: extracted_data = json.loads(result)
	except json.JSONDecodeError: extracted_data = {}
	else: extracted_data = {}
	except Exception: extracted_data = {}

	if "error" in extracted_data or not extracted_data:
	print("Adaptive Vision: Attempting medium resolution (150 DPI)...")
	med_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=150).tobytes("png"))) for page in pdf_doc]
	extracted_data = _extract_brochure_data_with_vision(med_res_images, user_api_key, selected_model_key)
	is_poor_quality = "error" in extracted_data or (not extracted_data.get("contacts") and len(extracted_data.get("raw_text", "")) < 50)
	if is_poor_quality:
	print("Medium resolution failed. Retrying with high resolution (300 DPI)...")
	high_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=300).tobytes("png"))) for page in pdf_doc]
	extracted_data = _extract_brochure_data_with_vision(high_res_images, user_api_key, selected_model_key)

	if "error" in extracted_data: return jsonify(extracted_data), 500

	final_brochure_object = {
	"id": brochure_json_id,
	"company_name": extracted_data.get("company_name", "Unknown Company"),
	"contacts": extracted_data.get("contacts", []),
	"raw_text": extracted_data.get("raw_text", ""),
	"image_filename": pdf_filename
	}
	for contact in final_brochure_object["contacts"]: contact["id"] = os.urandom(8).hex()

	user_brochures = _load_user_data(user_api_key, 'brochures')
	user_brochures.insert(0, final_brochure_object)
	_save_user_data(user_api_key, 'brochures', user_brochures)

	try:
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
	new_brochure = Brochure(
	json_id=brochure_json_id,
	company_name=final_brochure_object.get("company_name"),
	raw_text=final_brochure_object.get("raw_text"),
	source_document=file.filename,
	user_hash=user_hash
	)
	db.session.add(new_brochure)

	for contact_data in final_brochure_object.get("contacts", []):
	new_contact = Contact(
	json_id=contact_data['id'],
	owner_name=contact_data.get("Owner Name"),
	email=contact_data.get("Email"),
	phone_number=contact_data.get("Number"),
	brochure=new_brochure
	)
	db.session.add(new_contact)

	db.session.commit()
	print(f"Successfully saved brochure '{new_brochure.company_name}' and {len(new_brochure.contacts)} contacts to the database.")
	except Exception as e:
	db.session.rollback()
	print(f"DATABASE ERROR: Failed to save brochure data. Error: {e}")
	traceback.print_exc()

	print("Indexing separated and cleaned content for high-quality RAG...")
	contacts = final_brochure_object.get("contacts", [])
	if contacts:
	contact_text_parts = [f"Contact information for {final_brochure_object.get('company_name', 'this company')}:"]
	for contact in contacts:
	name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
	contact_info = [f"Name: {name}"]
	if email: contact_info.append(f"Email: {email}")
	if number: contact_info.append(f"Phone: {number}")
	contact_text_parts.append("- " + ", ".join(contact_info))
	contacts_document_text = "\n".join(contact_text_parts)
	rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, f"{brochure_json_id}_contacts", 'brochures')
	clean_info_text = _create_clean_info_text(final_brochure_object)
	if clean_info_text and clean_info_text.strip():
	rag_core.add_document_to_knowledge_base(user_api_key, clean_info_text, f"{brochure_json_id}_info", 'brochures')
	print("RAG indexing completed successfully!")

	# Save metadata to ChromaDB for persistence across restarts
	final_brochure_object['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, 'brochures', brochure_json_id, final_brochure_object)

	return jsonify(final_brochure_object)
	except Exception as e:
	print(f"An error occurred in process_brochure endpoint: {e}")
	traceback.print_exc()
	return jsonify({'error': f'Server processing failed: {e}'}), 500

	@app.route('/chat', methods=['POST'])
	def chat_endpoint():
	data = request.get_json()
	query_text, mode, selected_model_key = data.get('query'), data.get('mode'), data.get('selectedModel')
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not all([user_api_key, query_text, mode, selected_model_key]): return jsonify({'error': 'Query, mode, and model are required.'}), 400
	if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400
	try:
	session['api_key'] = user_api_key

	# Save user message to chat history
	rag_core.save_chat_message(user_api_key, mode, 'user', query_text)

	intent = 'synthesis' if "table" in query_text.lower() or "list all" in query_text.lower() else 'research'
	print(f"Intent detected: {intent}")
	if intent == 'synthesis':
	# Try ChromaDB first, fall back to JSON
	data_source = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if not data_source:
	data_source = _load_user_data(user_api_key, mode)
	synthesis_data = []
	if mode == 'brochures':
	for brochure in data_source:
	for contact in brochure.get('contacts', []):
	synthesis_data.append({"Company Name": brochure.get("company_name"), "Owner Name": contact.get("Owner Name"), "Email": contact.get("Email"), "Number": contact.get("Number")})
	else:
	synthesis_data = data_source
	synthesis_prompt = f"As a data analyst, create a markdown table based on the user's request from the following JSON data.\nJSON: {json.dumps(synthesis_data, indent=2)}\nRequest: {query_text}\nAnswer:"
	answer = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, synthesis_prompt)
	else:
	answer = rag_core.query_knowledge_base(user_api_key, query_text, mode, selected_model_key)

	# Save assistant response to chat history
	rag_core.save_chat_message(user_api_key, mode, 'assistant', answer)

	return jsonify({'answer': answer})
	except Exception as e:
	print(f"Error in /chat endpoint: {e}"); traceback.print_exc()
	return jsonify({'error': 'An internal error occurred.'}), 500

	@app.route('/chat_history/<mode>', methods=['GET'])
	def get_chat_history_endpoint(mode):
	user_api_key = OPENROUTER_API_KEY
	if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
	limit = request.args.get('limit', 20, type=int)
	history = rag_core.get_chat_history(user_api_key, mode, limit)
	return jsonify({'history': history})

	@app.route('/clear_chat/<mode>', methods=['POST'])
	def clear_chat_endpoint(mode):
	user_api_key = OPENROUTER_API_KEY
	if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400
	success = rag_core.clear_chat_history(user_api_key, mode)
	return jsonify({'success': success})

	@app.route('/sync_check/<mode>', methods=['GET'])
	def sync_check_endpoint(mode):
	"""Check for data updates - returns item count and hash for change detection"""
	user_api_key = OPENROUTER_API_KEY
	if not user_api_key:
	return jsonify({'error': 'Server API key not configured'}), 400

	if mode not in ['cards', 'brochures']:
	return jsonify({'error': 'Invalid mode'}), 400

	try:
	# Get data from ChromaDB first, then fall back to JSON
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if chroma_data:
	data = chroma_data
	else:
	data = _load_user_data(user_api_key, mode)

	# Calculate count and hash of IDs for change detection
	count = len(data) if data else 0
	ids = sorted([item.get('id', '') for item in data]) if data else []
	ids_hash = hashlib.md5(''.join(ids).encode()).hexdigest()[:8]

	return jsonify({
	'count': count,
	'hash': ids_hash,
	'timestamp': time.time()
	})
	except Exception as e:
	print(f"Sync check error: {e}")
	return jsonify({'count': 0, 'hash': '', 'timestamp': time.time()})

	@app.route('/load_data/<mode>', methods=['POST'])
	def load_data_endpoint(mode):
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400

	# Try loading from ChromaDB first (persists across restarts)
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if chroma_data:
	print(f"Loaded {len(chroma_data)} items from ChromaDB for {mode}")
	return jsonify(chroma_data)

	# Fall back to local JSON (for backwards compatibility)
	user_data = _load_user_data(user_api_key, mode)
	return jsonify(user_data)

	@app.route('/update_card/<mode>/<item_id>', methods=['POST'])
	def update_card_endpoint(mode, item_id):
	data = request.get_json()
	field, value, contact_id = data.get('field'), data.get('value'), data.get('contactId')
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400

	# Step 1: Update JSON file (Existing Logic, Unchanged)
	user_data = _load_user_data(user_api_key, mode)
	item_found_in_json = False
	if mode == 'cards':
	for card in user_data:
	if card.get('id') == item_id:
	card[field] = value
	item_found_in_json = True
	break
	elif mode == 'brochures':
	for brochure in user_data:
	if brochure.get('id') == item_id and contact_id:
	for contact in brochure.get('contacts', []):
	if contact.get('id') == contact_id:
	contact[field] = value
	item_found_in_json = True
	break
	if item_found_in_json: break
	if item_found_in_json:
	_save_user_data(user_api_key, mode, user_data)

	# Step 1.5: Update ChromaDB (RAG knowledge base)
	try:
	if mode == 'cards':
	# Get the updated card data
	updated_card = next((c for c in user_data if c.get('id') == item_id), None)
	if updated_card:
	# Remove old document and re-add with updated content
	rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
	raw_text = ' '.join(str(v) for k, v in updated_card.items() if v and k not in ['id', 'image_filename'])
	rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode)
	# Also update metadata in ChromaDB
	updated_card['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, updated_card)
	print(f"ChromaDB: Updated document and metadata {item_id} in {mode} knowledge base")
	elif mode == 'brochures' and contact_id:
	# Find the brochure and re-index its contacts
	brochure = next((b for b in user_data if b.get('id') == item_id), None)
	if brochure:
	# Remove old contacts document and re-add with updated content
	contacts_doc_id = f"{item_id}_contacts"
	rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
	contacts = brochure.get("contacts", [])
	if contacts:
	contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
	for contact in contacts:
	name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
	contact_info = [f"Name: {name}"]
	if email: contact_info.append(f"Email: {email}")
	if number: contact_info.append(f"Phone: {number}")
	contact_text_parts.append("- " + ", ".join(contact_info))
	contacts_document_text = "\n".join(contact_text_parts)
	rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
	# Also update metadata in ChromaDB
	brochure['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
	print(f"ChromaDB: Updated contacts and metadata for brochure {item_id}")
	except Exception as e:
	print(f"ChromaDB update warning: {e}")

	# ## FINAL DATABASE CODE ##
	# Step 2: Update Database (New Logic)
	try:
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
	if mode == 'cards':
	db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first()
	if db_card:
	field_map = {"Owner Name": "owner_name", "Company Name": "company_name", "Email": "email", "Number": "phone_number", "Address": "address"}
	db_field = field_map.get(field)
	if db_field:
	setattr(db_card, db_field, value)
	db.session.commit()
	print(f"Database updated for business card json_id: {item_id}")
	return jsonify({"success": True})
	elif mode == 'brochures' and contact_id:
	db_contact = Contact.query.filter_by(json_id=contact_id).first()
	if db_contact and db_contact.brochure.user_hash == user_hash:
	field_map = {"Owner Name": "owner_name", "Email": "email", "Number": "phone_number"}
	db_field = field_map.get(field)
	if db_field:
	setattr(db_contact, db_field, value)
	db.session.commit()
	print(f"Database updated for brochure contact json_id: {contact_id}")
	return jsonify({"success": True})

	if not item_found_in_json:
	# Try to find in ChromaDB if not in JSON
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if chroma_data:
	if mode == 'cards':
	for card in chroma_data:
	if card.get('id') == item_id:
	card[field] = value
	card['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, card)
	# Also update RAG knowledge base
	rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
	raw_text = ' '.join(str(v) for k, v in card.items() if v and k not in ['id', 'image_filename', '_timestamp'])
	rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode)
	print(f"ChromaDB: Updated card {item_id} directly in ChromaDB")
	return jsonify({"success": True})
	elif mode == 'brochures' and contact_id:
	for brochure in chroma_data:
	if brochure.get('id') == item_id:
	for contact in brochure.get('contacts', []):
	if contact.get('id') == contact_id:
	contact[field] = value
	brochure['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
	# Re-index contacts in RAG
	contacts_doc_id = f"{item_id}_contacts"
	rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
	contacts = brochure.get("contacts", [])
	if contacts:
	contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
	for c in contacts:
	name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number")
	contact_info = [f"Name: {name}"]
	if email: contact_info.append(f"Email: {email}")
	if number: contact_info.append(f"Phone: {number}")
	contact_text_parts.append("- " + ", ".join(contact_info))
	contacts_document_text = "\n".join(contact_text_parts)
	rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
	print(f"ChromaDB: Updated brochure contact {contact_id} directly in ChromaDB")
	return jsonify({"success": True})
	return jsonify({"success": False, "message": "Item not found"}), 404
	return jsonify({"success": True})

	except Exception as e:
	db.session.rollback()
	print(f"DATABASE ERROR: Failed to update record. Error: {e}")
	return jsonify({"success": False, "message": "Database update failed."}), 500
	# ## END FINAL DATABASE CODE ##


	@app.route('/delete_card/<mode>/<item_id>', methods=['DELETE'])
	def delete_card_endpoint(mode, item_id):
	data = request.get_json()
	contact_id = data.get('contactId')
	user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key
	if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400

	# Step 1: Delete from JSON file (Existing Logic, Unchanged)
	user_data = _load_user_data(user_api_key, mode)
	item_found_in_json = False
	original_len = len(user_data)
	if mode == 'cards':
	user_data = [c for c in user_data if c.get('id') != item_id]
	if len(user_data) < original_len: item_found_in_json = True
	elif mode == 'brochures':
	if contact_id:
	for brochure in user_data:
	if brochure.get('id') == item_id:
	original_contacts_len = len(brochure.get('contacts', []))
	brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
	if len(brochure.get('contacts', [])) < original_contacts_len:
	item_found_in_json = True
	break
	else: # Delete whole brochure
	user_data = [b for b in user_data if b.get('id') != item_id]
	if len(user_data) < original_len: item_found_in_json = True
	if item_found_in_json:
	_save_user_data(user_api_key, mode, user_data)

	# Step 1.5: Delete from ChromaDB (RAG knowledge base)
	item_found_in_chroma = False
	try:
	# Check if item exists in ChromaDB before deleting
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if chroma_data:
	if mode == 'cards':
	item_found_in_chroma = any(c.get('id') == item_id for c in chroma_data)
	elif mode == 'brochures':
	if contact_id:
	brochure = next((b for b in chroma_data if b.get('id') == item_id), None)
	if brochure:
	item_found_in_chroma = any(c.get('id') == contact_id for c in brochure.get('contacts', []))
	else:
	item_found_in_chroma = any(b.get('id') == item_id for b in chroma_data)

	if mode == 'cards':
	# Remove card document from ChromaDB
	rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
	# Also delete metadata from ChromaDB
	rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
	print(f"ChromaDB: Removed document and metadata {item_id} from {mode} knowledge base")
	elif mode == 'brochures':
	if contact_id:
	# Contact deleted - re-index the brochure's contacts document
	brochure = next((b for b in user_data if b.get('id') == item_id), None)
	# Also check ChromaDB data if not found in JSON
	if not brochure and chroma_data:
	brochure = next((b for b in chroma_data if b.get('id') == item_id), None)
	if brochure:
	# Remove the contact from the brochure in ChromaDB
	brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
	if brochure:
	contacts_doc_id = f"{item_id}_contacts"
	rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
	contacts = brochure.get("contacts", [])
	if contacts:
	contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
	for contact in contacts:
	name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number")
	contact_info = [f"Name: {name}"]
	if email: contact_info.append(f"Email: {email}")
	if number: contact_info.append(f"Phone: {number}")
	contact_text_parts.append("- " + ", ".join(contact_info))
	contacts_document_text = "\n".join(contact_text_parts)
	rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
	print(f"ChromaDB: Re-indexed contacts for brochure {item_id} after contact deletion")
	# Update metadata in ChromaDB (re-save brochure with updated contacts)
	brochure['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
	else:
	# Whole brochure deleted - remove both contacts and info documents
	rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
	rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
	# Also delete metadata from ChromaDB
	rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
	print(f"ChromaDB: Removed brochure {item_id} documents and metadata from knowledge base")
	except Exception as e:
	print(f"ChromaDB removal warning: {e}")


	# ## FINAL DATABASE CODE ##
	# Step 2: Delete from Database (New Logic)
	try:
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
	if mode == 'cards':
	db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first()
	if db_card:
	db.session.delete(db_card)
	db.session.commit()
	print(f"Database record deleted for business card json_id: {item_id}")
	return jsonify({"success": True})
	elif mode == 'brochures':
	if contact_id:
	db_contact = Contact.query.filter_by(json_id=contact_id).first()
	if db_contact and db_contact.brochure.user_hash == user_hash:
	db.session.delete(db_contact)
	db.session.commit()
	print(f"Database record deleted for brochure contact json_id: {contact_id}")
	return jsonify({"success": True})
	else: # Delete whole brochure
	db_brochure = Brochure.query.filter_by(json_id=item_id, user_hash=user_hash).first()
	if db_brochure:
	db.session.delete(db_brochure) # Cascading delete will handle linked contacts
	db.session.commit()
	print(f"Database record deleted for brochure json_id: {item_id}")
	return jsonify({"success": True})

	if not item_found_in_json and not item_found_in_chroma:
	# Try to find in ChromaDB if not in JSON (should rarely happen now)
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
	if chroma_data:
	if mode == 'cards':
	for card in chroma_data:
	if card.get('id') == item_id:
	rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
	rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
	print(f"ChromaDB: Deleted card {item_id} directly from ChromaDB")
	return jsonify({"success": True})
	elif mode == 'brochures':
	for brochure in chroma_data:
	if brochure.get('id') == item_id:
	if contact_id:
	brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id]
	brochure['_timestamp'] = time.time()
	rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure)
	# Re-index contacts in RAG
	contacts_doc_id = f"{item_id}_contacts"
	rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode)
	contacts = brochure.get("contacts", [])
	if contacts:
	contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"]
	for c in contacts:
	name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number")
	contact_info = [f"Name: {name}"]
	if email: contact_info.append(f"Email: {email}")
	if number: contact_info.append(f"Phone: {number}")
	contact_text_parts.append("- " + ", ".join(contact_info))
	contacts_document_text = "\n".join(contact_text_parts)
	rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode)
	print(f"ChromaDB: Deleted contact {contact_id} from brochure {item_id} in ChromaDB")
	return jsonify({"success": True})
	else:
	# Delete whole brochure
	rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
	rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
	rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
	print(f"ChromaDB: Deleted brochure {item_id} directly from ChromaDB")
	return jsonify({"success": True})
	return jsonify({"success": False, "message": "Item not found"}), 404
	return jsonify({"success": True})

	except Exception as e:
	db.session.rollback()
	print(f"DATABASE ERROR: Failed to delete record. Error: {e}")
	return jsonify({"success": False, "message": "Database delete failed."}), 500
	# ## END FINAL DATABASE CODE ##

	@app.route('/delete_all/<mode>', methods=['DELETE'])
	def delete_all_endpoint(mode):
	"""Delete all items for a given mode (cards or brochures)"""
	user_api_key = OPENROUTER_API_KEY
	if not user_api_key:
	return jsonify({'error': 'Server API key not configured'}), 400

	if mode not in ['cards', 'brochures']:
	return jsonify({'error': 'Invalid mode'}), 400

	deleted_count = 0

	try:
	# Step 1: Count items before deletion (from both sources)
	user_data = _load_user_data(user_api_key, mode)
	chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)

	# Get count from whichever source has more
	deleted_count = max(len(user_data), len(chroma_data) if chroma_data else 0)

	if deleted_count == 0:
	return jsonify({
	'success': True,
	'deleted_count': 0,
	'message': f'No {mode} to delete'
	})

	print(f"Starting deletion of {deleted_count} {mode}...")

	# Step 2: Clear JSON file
	_save_user_data(user_api_key, mode, [])
	print(f"Cleared JSON file for {mode}")

	# Step 3: Delete ALL metadata from ChromaDB (bulk delete)
	metadata_deleted = rag_core.delete_all_metadata_from_chroma(user_api_key, mode)
	print(f"Deleted {metadata_deleted} metadata records from ChromaDB")

	# Step 4: Delete ALL document chunks from ChromaDB (bulk delete)
	docs_deleted = rag_core.delete_all_documents_from_chroma(user_api_key, mode)
	print(f"Deleted {docs_deleted} document chunks from ChromaDB")

	# Step 5: Delete from SQL Database
	user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
	if mode == 'cards':
	db_deleted = BusinessCard.query.filter_by(user_hash=user_hash).delete()
	print(f"Deleted {db_deleted} business cards from SQL database")
	elif mode == 'brochures':
	# Delete all brochures and their contacts (cascade)
	db_deleted = Brochure.query.filter_by(user_hash=user_hash).delete()
	print(f"Deleted {db_deleted} brochures from SQL database")

	db.session.commit()
	print(f"Successfully deleted all {deleted_count} {mode} from all storage layers")

	return jsonify({
	'success': True,
	'deleted_count': deleted_count,
	'message': f'Successfully deleted {deleted_count} {mode}'
	})

	except Exception as e:
	db.session.rollback()
	print(f"DATABASE ERROR: Failed to delete all {mode}. Error: {e}")
	traceback.print_exc()
	return jsonify({
	'success': False,
	'message': f'Failed to delete all {mode}: {str(e)}'
	}), 500


	@app.route('/')
	def serve_dashboard():
	return render_template('index.html')

	@app.route('/uploads/<filename>')
	def uploaded_file(filename):
	return send_from_directory(UPLOAD_FOLDER, filename)

	# Health check endpoint - responds immediately without waiting for model loading
	@app.route('/health')
	def health_check():
	return jsonify({"status": "ok", "message": "Service is running"}), 200

	# Create database tables (lightweight - runs at import time)
	with app.app_context():
	db.create_all()
	print("Database tables (business_card, brochure, contact) checked and created if necessary.")

	# Lazy initialization for RAG system (deferred until first request)
	_rag_initialized = False

	@app.before_request
	def ensure_rag_initialized():
	global _rag_initialized
	# Skip initialization for health checks and static files
	if request.endpoint in ('health_check', 'uploaded_file', 'static', 'serve_dashboard'):
	return
	if not _rag_initialized:
	print("First request received - initializing RAG system...")
	try:
	success = rag_core.initialize_rag_system()
	_rag_initialized = True
	if success:
	print("RAG system initialized successfully!")
	else:
	print("RAG system not available - OCR features will still work")
	except Exception as e:
	print(f"RAG initialization error (non-fatal): {e}")
	_rag_initialized = True # Mark as attempted so we don't retry

	if __name__ == "__main__":
	# Local development - initialize immediately for better dev experience
	try:
	rag_core.initialize_rag_system()
	except Exception as e:
	print(f"RAG initialization failed: {e}")
	print("App will start without RAG features")
	print("--- Server is starting! ---")
	print(f"User-specific data will be saved in '{os.path.abspath(DATA_FOLDER)}'")
	print("To use the dashboard, open your web browser and go to: http://127.0.0.1:5000")
	webbrowser.open_new('http://127.0.0.1:5000')
	app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)