# app.py import os import io import json import hashlib import requests import base64 from flask import Flask, request, jsonify, send_from_directory, render_template, session import webbrowser from flask_cors import CORS from PIL import Image import fitz # PyMuPDF import rag_core from datetime import timedelta import traceback import time import re from dotenv import load_dotenv load_dotenv() # --- MODIFIED: Import db and models from models.py --- from models import db, BusinessCard, Brochure, Contact app = Flask(__name__) CORS(app) # Disable template caching for development app.config['TEMPLATES_AUTO_RELOAD'] = True app.jinja_env.auto_reload = True # Session configuration app.secret_key = os.environ.get("SESSION_SECRET", "a-very-secret-key-for-sessions") app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=24) # --- FOLDER CONFIGURATION --- UPLOAD_FOLDER = 'uploads' DATA_FOLDER = 'user_data' app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) if not os.path.exists(DATA_FOLDER): os.makedirs(DATA_FOLDER) # --- DATABASE CONFIGURATION --- app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get( 'DATABASE_URI', 'sqlite:///local_crm.db' ) app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False # --- MODIFIED: Initialize the app with the database object --- db.init_app(app) # --- HARDCODED API KEY (loaded from environment) --- OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY") # --- DATABASE MODEL DEFINITIONS HAVE BEEN MOVED TO models.py --- MODEL_MAP = { 'gemini': 'google/gemma-3-4b-it:free', 'deepseek': 'google/gemma-3-27b-it:free', 'qwen': 'mistralai/mistral-small-3.1-24b-instruct:free', 'nvidia': 'nvidia/nemotron-nano-12b-v2-vl:free', 'amazon': 'amazon/nova-2-lite-v1:free' } # Best → fallback order (OCR strength) FALLBACK_ORDER = [ 'gemini', 'deepseek', 'qwen', 'nvidia', 'amazon' ] # All your other functions (_call_openrouter_api_with_fallback, etc.) remain unchanged below... def _call_openrouter_api_with_fallback(api_key, selected_model_key, prompt, images=[]): if images: vision_models = ['gemini','deepseek','qwen','nvidia','amazon'] models_to_try = [m for m in vision_models if m == selected_model_key] models_to_try.extend([m for m in vision_models if m != selected_model_key]) models_to_try.extend([m for m in FALLBACK_ORDER if m not in vision_models]) else: models_to_try = [selected_model_key] for model in FALLBACK_ORDER: if model != selected_model_key: models_to_try.append(model) last_error = None for model_key in models_to_try: model_name = MODEL_MAP.get(model_key) if not model_name: continue print(f"Attempting API call with model: {model_name}...") content_parts = [{"type": "text", "text": prompt}] if images and model_key in ['gemini','deepseek','qwen','nvidia','amazon']: for img in images: buffered = io.BytesIO() img_format = img.format or "PNG" img.save(buffered, format=img_format) img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') content_parts.append({ "type": "image_url", "image_url": { "url": f"data:image/{img_format.lower()};base64,{img_base64}" } }) elif images and model_key not in ['gemini','deepseek','qwen','nvidia','amazon']: print(f"Skipping {model_name} - no image input support") continue try: response = requests.post( url="https://openrouter.ai/api/v1/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": model_name, "messages": [{"role": "user", "content": content_parts}]}, timeout=30 ) response.raise_for_status() api_response = response.json() if 'choices' not in api_response or not api_response['choices']: print(f"Model {model_name} returned empty response") last_error = {"error": f"Model {model_name} returned empty response"} continue json_text = api_response['choices'][0]['message']['content'] cleaned_json_text = re.search(r'```json\s*([\s\S]+?)\s*```', json_text) if cleaned_json_text: json_text = cleaned_json_text.group(1) else: json_text = json_text.strip() result = json.loads(json_text) print(f"Successfully processed with model: {model_name}") return result except requests.exceptions.HTTPError as http_err: error_msg = f"HTTP error occurred for model {model_name}: {http_err}" if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}" print(error_msg) last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."} continue except requests.exceptions.Timeout: print(f"Timeout error for model {model_name}") last_error = {"error": f"Request timeout for model {model_name}"} continue except json.JSONDecodeError as json_err: error_msg = f"JSON Decode Error for model {model_name}: {json_err}\nMalformed response: {json_text}" print(error_msg) last_error = {"error": f"Model {model_name} returned invalid JSON."} continue except Exception as e: print(f"An error occurred with model {model_name}: {e}") traceback.print_exc() last_error = {"error": f"An unexpected error occurred with model {model_name}."} continue return last_error or {"error": "All models failed to process the request."} def _call_openrouter_api_text_only_with_fallback(api_key, selected_model_key, prompt): models_to_try = [selected_model_key] + [m for m in FALLBACK_ORDER if m != selected_model_key] last_error = None for model_key in models_to_try: model_name = MODEL_MAP.get(model_key) if not model_name: continue print(f"Attempting text-only API call with model: {model_name}...") try: response = requests.post( url="https://openrouter.ai/api/v1/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": model_name, "messages": [{"role": "user", "content": prompt}]}, timeout=30 ) response.raise_for_status() api_response = response.json() if 'choices' not in api_response or not api_response['choices']: last_error = {"error": f"Model {model_name} returned unexpected response format"} continue result = api_response['choices'][0]['message']['content'] print(f"Successfully processed text with model: {model_name}") return result except requests.exceptions.HTTPError as http_err: error_msg = f"HTTP error occurred for model {model_name}: {http_err}" if hasattr(response, 'text'): error_msg += f"\nResponse: {response.text}" print(error_msg) last_error = {"error": f"API request failed for {model_name} with status {response.status_code}."} continue except requests.exceptions.Timeout: print(f"Timeout error for model {model_name}") last_error = {"error": f"Request timeout for model {model_name}"} continue except Exception as e: print(f"An error occurred with model {model_name}: {e}") traceback.print_exc() last_error = {"error": f"An unexpected error occurred with model {model_name}."} continue if isinstance(last_error, dict) and "error" in last_error: return last_error["error"] return "All models failed to process the text request." def _extract_contact_info_from_text(text): if not text: return "", [] email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' phone_pattern = r'(?:\+?\d{1,4}[-.\s]?)?(?:\(?\d{1,4}\)?[-.\s]?)?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}' emails = re.findall(email_pattern, text, re.IGNORECASE) phones = re.findall(phone_pattern, text) clean_text = text clean_text = re.sub(email_pattern, '', clean_text, flags=re.IGNORECASE) for phone in phones: if len(phone.replace('-', '').replace('.', '').replace(' ', '').replace('(', '').replace(')', '').replace('+', '')) >= 7: clean_text = clean_text.replace(phone, '') clean_text = re.sub(r'\s+', ' ', clean_text).strip() clean_text = re.sub(r'\n\s*\n', '\n', clean_text) return clean_text, emails + phones def _create_clean_info_text(brochure_data): company_name = brochure_data.get("company_name", "") raw_text = brochure_data.get("raw_text", "") info_parts = [] if company_name and company_name != "Unknown Company": info_parts.append(f"Company: {company_name}") if raw_text: clean_text, _ = _extract_contact_info_from_text(raw_text) contact_phrases = [r'contact\s+us\s*:?', r'for\s+more\s+information\s*:?', r'reach\s+out\s+to\s*:?', r'get\s+in\s+touch\s*:?', r'phone\s*:', r'email\s*:', r'tel\s*:', r'mobile\s*:', r'call\s+us\s*:?', r'write\s+to\s+us\s*:?',] for phrase in contact_phrases: clean_text = re.sub(phrase, '', clean_text, flags=re.IGNORECASE) clean_text = re.sub(r'\s+', ' ', clean_text).strip() clean_text = re.sub(r'\n\s*\n', '\n', clean_text) if clean_text: info_parts.append(clean_text) return "\n".join(info_parts) if info_parts else "" def _get_user_data_filepath(user_api_key, mode): user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()[:16] return os.path.join(DATA_FOLDER, f'{user_hash}_{mode}_data.json') def _load_user_data(user_api_key, mode): filepath = _get_user_data_filepath(user_api_key, mode) try: if os.path.exists(filepath): with open(filepath, 'r') as f: return json.load(f) except (IOError, json.JSONDecodeError): return [] return [] def _save_user_data(user_api_key, mode, data): filepath = _get_user_data_filepath(user_api_key, mode) try: with open(filepath, 'w') as f: json.dump(data, f, indent=4) return True except IOError: return False def _clean_and_validate_contacts(data): if not data or "contacts" not in data: return data cleaned_contacts = [] def is_placeholder(value): if not isinstance(value, str): return True test_val = value.strip().lower() if not test_val: return True placeholders = ["n/a", "na", "none", "null"] if test_val in placeholders: return True if "not available" in test_val or "not specified" in test_val or "not applicable" in test_val: return True return False for contact in data.get("contacts", []): name = contact.get("Owner Name") if is_placeholder(name): continue cleaned_contacts.append({ "Owner Name": name.strip(), "Email": None if is_placeholder(contact.get("Email")) else contact.get("Email").strip(), "Number": None if is_placeholder(contact.get("Number")) else contact.get("Number").strip() }) data["contacts"] = cleaned_contacts return data def extract_card_data(image_bytes, user_api_key, selected_model_key): print("Processing business card with OpenRouter API...") if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."} try: img = Image.open(io.BytesIO(image_bytes)) prompt = """You are an expert at reading business cards. Analyze the image and extract information into a structured JSON format. The JSON object must use these exact keys: "Owner Name", "Company Name", "Email", "Number", "Address". If a piece of information is not present, its value must be `null`. Your entire response MUST be a single, valid JSON object.""" parsed_info = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=[img]) if "error" in parsed_info: return parsed_info return {"Owner Name": parsed_info.get("Owner Name"), "Company Name": parsed_info.get("Company Name"), "Email": parsed_info.get("Email"), "Number": parsed_info.get("Number"), "Address": parsed_info.get("Address")} except Exception as e: print(f"Error during OpenRouter API call for business card: {e}") traceback.print_exc() return {"error": f"Failed to parse AI response: {e}"} def _extract_brochure_data_with_vision(image_list, user_api_key, selected_model_key): print(f"Vision Extraction: Analyzing {len(image_list)} images with OpenRouter...") if not user_api_key: return {"error": "A valid OpenRouter API Key was not provided."} try: prompt = """You are a world-class document analysis expert. Analyze the provided document images with maximum precision. CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) and put them in the "contacts" array. 3. Extract ALL OTHER content (company description, services, mission, addresses, general information) as "raw_text". 4. DO NOT include contact details like names, emails, or phone numbers in the raw_text. 5. Focus on separating contact information from general company information. OUTPUT FORMAT: Return a SINGLE, valid JSON object with these exact keys: "company_name", "contacts", "raw_text". The "contacts" key must contain a list of objects, each with "Owner Name", "Email", and "Number". If a piece of information is missing for a contact, use `null`. The "raw_text" should contain business information, services, descriptions, but NO contact details.""" raw_data = _call_openrouter_api_with_fallback(user_api_key, selected_model_key, prompt, images=image_list) if "error" in raw_data: return raw_data print("AI vision extraction complete. Applying bulletproof cleaning...") cleaned_data = _clean_and_validate_contacts(raw_data) return cleaned_data except Exception as e: print(f"Error during unified brochure vision extraction: {e}") traceback.print_exc() return {"error": f"Failed to parse data from brochure images: {e}"} @app.before_request def make_session_permanent(): session.permanent = True @app.route('/process_card', methods=['POST']) def process_card_endpoint(): if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400 file, selected_model_key = request.files['file'], request.form.get('selectedModel') user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400 if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400 try: image_bytes = file.read() extracted_info = extract_card_data(image_bytes, user_api_key, selected_model_key) if "error" in extracted_info: return jsonify(extracted_info), 500 file_id = os.urandom(8).hex() _, f_ext = os.path.splitext(file.filename) safe_ext = f_ext if f_ext.lower() in ['.png', '.jpg', '.jpeg', '.webp'] else '.png' image_filename = f"{file_id}{safe_ext}" save_path = os.path.join(UPLOAD_FOLDER, image_filename) with open(save_path, 'wb') as f: f.write(image_bytes) extracted_info['id'] = file_id extracted_info['image_filename'] = image_filename user_contacts = _load_user_data(user_api_key, 'cards') user_contacts.insert(0, extracted_info) _save_user_data(user_api_key, 'cards', user_contacts) try: user_hash = hashlib.sha256(user_api_key.encode()).hexdigest() new_card = BusinessCard( json_id=file_id, owner_name=extracted_info.get("Owner Name"), company_name=extracted_info.get("Company Name"), email=extracted_info.get("Email"), phone_number=extracted_info.get("Number"), address=extracted_info.get("Address"), source_document=file.filename, user_hash=user_hash ) db.session.add(new_card) db.session.commit() print(f"Successfully saved business card for '{extracted_info.get('Owner Name')}' to the database.") except Exception as e: db.session.rollback() print(f"DATABASE ERROR: Failed to save business card data. Error: {e}") traceback.print_exc() raw_text_for_rag = ' '.join(str(v) for k, v in extracted_info.items() if v and k not in ['id', 'image_filename']) rag_core.add_document_to_knowledge_base(user_api_key, raw_text_for_rag, file_id, 'cards') # Save metadata to ChromaDB for persistence across restarts extracted_info['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, 'cards', file_id, extracted_info) return jsonify(extracted_info) except Exception as e: print(f"An error occurred in process_card endpoint: {e}") traceback.print_exc() return jsonify({'error': 'Server processing failed'}), 500 @app.route('/process_brochure', methods=['POST']) def process_brochure_endpoint(): if 'file' not in request.files: return jsonify({'error': 'No file part'}), 400 file, selected_model_key = request.files['file'], request.form.get('selectedModel') user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not user_api_key or not selected_model_key: return jsonify({'error': 'Server API key not configured or model not selected'}), 400 if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400 try: pdf_bytes = file.read() pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf") brochure_json_id = os.urandom(8).hex() pdf_filename = f"{brochure_json_id}.pdf" save_path = os.path.join(UPLOAD_FOLDER, pdf_filename) with open(save_path, 'wb') as f: f.write(pdf_bytes) extracted_data = {} full_text_from_pdf = "".join(page.get_text("text") for page in pdf_doc).strip() if len(full_text_from_pdf) > 100: print("'Text-First' successful. Using text model.") try: prompt = """Analyze the following text and structure it into a JSON object with keys "company_name", "contacts", and "raw_text". CRITICAL INSTRUCTIONS: 1. Extract the company name. 2. Extract ONLY contact information (names, emails, phone numbers) into the "contacts" array. 3. Extract ALL OTHER content into "raw_text". 4. DO NOT include contact details in raw_text. "contacts" should be a list of objects with "Owner Name", "Email", and "Number". DOCUMENT TEXT: --- {full_text_from_pdf} ---""" result = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, prompt) if isinstance(result, str) and not result.startswith("All models failed"): try: extracted_data = json.loads(result) except json.JSONDecodeError: extracted_data = {} else: extracted_data = {} except Exception: extracted_data = {} if "error" in extracted_data or not extracted_data: print("Adaptive Vision: Attempting medium resolution (150 DPI)...") med_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=150).tobytes("png"))) for page in pdf_doc] extracted_data = _extract_brochure_data_with_vision(med_res_images, user_api_key, selected_model_key) is_poor_quality = "error" in extracted_data or (not extracted_data.get("contacts") and len(extracted_data.get("raw_text", "")) < 50) if is_poor_quality: print("Medium resolution failed. Retrying with high resolution (300 DPI)...") high_res_images = [Image.open(io.BytesIO(page.get_pixmap(dpi=300).tobytes("png"))) for page in pdf_doc] extracted_data = _extract_brochure_data_with_vision(high_res_images, user_api_key, selected_model_key) if "error" in extracted_data: return jsonify(extracted_data), 500 final_brochure_object = { "id": brochure_json_id, "company_name": extracted_data.get("company_name", "Unknown Company"), "contacts": extracted_data.get("contacts", []), "raw_text": extracted_data.get("raw_text", ""), "image_filename": pdf_filename } for contact in final_brochure_object["contacts"]: contact["id"] = os.urandom(8).hex() user_brochures = _load_user_data(user_api_key, 'brochures') user_brochures.insert(0, final_brochure_object) _save_user_data(user_api_key, 'brochures', user_brochures) try: user_hash = hashlib.sha256(user_api_key.encode()).hexdigest() new_brochure = Brochure( json_id=brochure_json_id, company_name=final_brochure_object.get("company_name"), raw_text=final_brochure_object.get("raw_text"), source_document=file.filename, user_hash=user_hash ) db.session.add(new_brochure) for contact_data in final_brochure_object.get("contacts", []): new_contact = Contact( json_id=contact_data['id'], owner_name=contact_data.get("Owner Name"), email=contact_data.get("Email"), phone_number=contact_data.get("Number"), brochure=new_brochure ) db.session.add(new_contact) db.session.commit() print(f"Successfully saved brochure '{new_brochure.company_name}' and {len(new_brochure.contacts)} contacts to the database.") except Exception as e: db.session.rollback() print(f"DATABASE ERROR: Failed to save brochure data. Error: {e}") traceback.print_exc() print("Indexing separated and cleaned content for high-quality RAG...") contacts = final_brochure_object.get("contacts", []) if contacts: contact_text_parts = [f"Contact information for {final_brochure_object.get('company_name', 'this company')}:"] for contact in contacts: name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number") contact_info = [f"Name: {name}"] if email: contact_info.append(f"Email: {email}") if number: contact_info.append(f"Phone: {number}") contact_text_parts.append("- " + ", ".join(contact_info)) contacts_document_text = "\n".join(contact_text_parts) rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, f"{brochure_json_id}_contacts", 'brochures') clean_info_text = _create_clean_info_text(final_brochure_object) if clean_info_text and clean_info_text.strip(): rag_core.add_document_to_knowledge_base(user_api_key, clean_info_text, f"{brochure_json_id}_info", 'brochures') print("RAG indexing completed successfully!") # Save metadata to ChromaDB for persistence across restarts final_brochure_object['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, 'brochures', brochure_json_id, final_brochure_object) return jsonify(final_brochure_object) except Exception as e: print(f"An error occurred in process_brochure endpoint: {e}") traceback.print_exc() return jsonify({'error': f'Server processing failed: {e}'}), 500 @app.route('/chat', methods=['POST']) def chat_endpoint(): data = request.get_json() query_text, mode, selected_model_key = data.get('query'), data.get('mode'), data.get('selectedModel') user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not all([user_api_key, query_text, mode, selected_model_key]): return jsonify({'error': 'Query, mode, and model are required.'}), 400 if selected_model_key not in MODEL_MAP: return jsonify({'error': 'Invalid model selected'}), 400 try: session['api_key'] = user_api_key # Save user message to chat history rag_core.save_chat_message(user_api_key, mode, 'user', query_text) intent = 'synthesis' if "table" in query_text.lower() or "list all" in query_text.lower() else 'research' print(f"Intent detected: {intent}") if intent == 'synthesis': # Try ChromaDB first, fall back to JSON data_source = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if not data_source: data_source = _load_user_data(user_api_key, mode) synthesis_data = [] if mode == 'brochures': for brochure in data_source: for contact in brochure.get('contacts', []): synthesis_data.append({"Company Name": brochure.get("company_name"), "Owner Name": contact.get("Owner Name"), "Email": contact.get("Email"), "Number": contact.get("Number")}) else: synthesis_data = data_source synthesis_prompt = f"As a data analyst, create a markdown table based on the user's request from the following JSON data.\nJSON: {json.dumps(synthesis_data, indent=2)}\nRequest: {query_text}\nAnswer:" answer = _call_openrouter_api_text_only_with_fallback(user_api_key, selected_model_key, synthesis_prompt) else: answer = rag_core.query_knowledge_base(user_api_key, query_text, mode, selected_model_key) # Save assistant response to chat history rag_core.save_chat_message(user_api_key, mode, 'assistant', answer) return jsonify({'answer': answer}) except Exception as e: print(f"Error in /chat endpoint: {e}"); traceback.print_exc() return jsonify({'error': 'An internal error occurred.'}), 500 @app.route('/chat_history/', methods=['GET']) def get_chat_history_endpoint(mode): user_api_key = OPENROUTER_API_KEY if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 limit = request.args.get('limit', 20, type=int) history = rag_core.get_chat_history(user_api_key, mode, limit) return jsonify({'history': history}) @app.route('/clear_chat/', methods=['POST']) def clear_chat_endpoint(mode): user_api_key = OPENROUTER_API_KEY if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 success = rag_core.clear_chat_history(user_api_key, mode) return jsonify({'success': success}) @app.route('/sync_check/', methods=['GET']) def sync_check_endpoint(mode): """Check for data updates - returns item count and hash for change detection""" user_api_key = OPENROUTER_API_KEY if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 if mode not in ['cards', 'brochures']: return jsonify({'error': 'Invalid mode'}), 400 try: # Get data from ChromaDB first, then fall back to JSON chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if chroma_data: data = chroma_data else: data = _load_user_data(user_api_key, mode) # Calculate count and hash of IDs for change detection count = len(data) if data else 0 ids = sorted([item.get('id', '') for item in data]) if data else [] ids_hash = hashlib.md5(''.join(ids).encode()).hexdigest()[:8] return jsonify({ 'count': count, 'hash': ids_hash, 'timestamp': time.time() }) except Exception as e: print(f"Sync check error: {e}") return jsonify({'count': 0, 'hash': '', 'timestamp': time.time()}) @app.route('/load_data/', methods=['POST']) def load_data_endpoint(mode): user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 # Try loading from ChromaDB first (persists across restarts) chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if chroma_data: print(f"Loaded {len(chroma_data)} items from ChromaDB for {mode}") return jsonify(chroma_data) # Fall back to local JSON (for backwards compatibility) user_data = _load_user_data(user_api_key, mode) return jsonify(user_data) @app.route('/update_card//', methods=['POST']) def update_card_endpoint(mode, item_id): data = request.get_json() field, value, contact_id = data.get('field'), data.get('value'), data.get('contactId') user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 # Step 1: Update JSON file (Existing Logic, Unchanged) user_data = _load_user_data(user_api_key, mode) item_found_in_json = False if mode == 'cards': for card in user_data: if card.get('id') == item_id: card[field] = value item_found_in_json = True break elif mode == 'brochures': for brochure in user_data: if brochure.get('id') == item_id and contact_id: for contact in brochure.get('contacts', []): if contact.get('id') == contact_id: contact[field] = value item_found_in_json = True break if item_found_in_json: break if item_found_in_json: _save_user_data(user_api_key, mode, user_data) # Step 1.5: Update ChromaDB (RAG knowledge base) try: if mode == 'cards': # Get the updated card data updated_card = next((c for c in user_data if c.get('id') == item_id), None) if updated_card: # Remove old document and re-add with updated content rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode) raw_text = ' '.join(str(v) for k, v in updated_card.items() if v and k not in ['id', 'image_filename']) rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode) # Also update metadata in ChromaDB updated_card['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, updated_card) print(f"ChromaDB: Updated document and metadata {item_id} in {mode} knowledge base") elif mode == 'brochures' and contact_id: # Find the brochure and re-index its contacts brochure = next((b for b in user_data if b.get('id') == item_id), None) if brochure: # Remove old contacts document and re-add with updated content contacts_doc_id = f"{item_id}_contacts" rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode) contacts = brochure.get("contacts", []) if contacts: contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"] for contact in contacts: name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number") contact_info = [f"Name: {name}"] if email: contact_info.append(f"Email: {email}") if number: contact_info.append(f"Phone: {number}") contact_text_parts.append("- " + ", ".join(contact_info)) contacts_document_text = "\n".join(contact_text_parts) rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode) # Also update metadata in ChromaDB brochure['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure) print(f"ChromaDB: Updated contacts and metadata for brochure {item_id}") except Exception as e: print(f"ChromaDB update warning: {e}") # ## FINAL DATABASE CODE ## # Step 2: Update Database (New Logic) try: user_hash = hashlib.sha256(user_api_key.encode()).hexdigest() if mode == 'cards': db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first() if db_card: field_map = {"Owner Name": "owner_name", "Company Name": "company_name", "Email": "email", "Number": "phone_number", "Address": "address"} db_field = field_map.get(field) if db_field: setattr(db_card, db_field, value) db.session.commit() print(f"Database updated for business card json_id: {item_id}") return jsonify({"success": True}) elif mode == 'brochures' and contact_id: db_contact = Contact.query.filter_by(json_id=contact_id).first() if db_contact and db_contact.brochure.user_hash == user_hash: field_map = {"Owner Name": "owner_name", "Email": "email", "Number": "phone_number"} db_field = field_map.get(field) if db_field: setattr(db_contact, db_field, value) db.session.commit() print(f"Database updated for brochure contact json_id: {contact_id}") return jsonify({"success": True}) if not item_found_in_json: # Try to find in ChromaDB if not in JSON chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if chroma_data: if mode == 'cards': for card in chroma_data: if card.get('id') == item_id: card[field] = value card['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, card) # Also update RAG knowledge base rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode) raw_text = ' '.join(str(v) for k, v in card.items() if v and k not in ['id', 'image_filename', '_timestamp']) rag_core.add_document_to_knowledge_base(user_api_key, raw_text, item_id, mode) print(f"ChromaDB: Updated card {item_id} directly in ChromaDB") return jsonify({"success": True}) elif mode == 'brochures' and contact_id: for brochure in chroma_data: if brochure.get('id') == item_id: for contact in brochure.get('contacts', []): if contact.get('id') == contact_id: contact[field] = value brochure['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure) # Re-index contacts in RAG contacts_doc_id = f"{item_id}_contacts" rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode) contacts = brochure.get("contacts", []) if contacts: contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"] for c in contacts: name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number") contact_info = [f"Name: {name}"] if email: contact_info.append(f"Email: {email}") if number: contact_info.append(f"Phone: {number}") contact_text_parts.append("- " + ", ".join(contact_info)) contacts_document_text = "\n".join(contact_text_parts) rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode) print(f"ChromaDB: Updated brochure contact {contact_id} directly in ChromaDB") return jsonify({"success": True}) return jsonify({"success": False, "message": "Item not found"}), 404 return jsonify({"success": True}) except Exception as e: db.session.rollback() print(f"DATABASE ERROR: Failed to update record. Error: {e}") return jsonify({"success": False, "message": "Database update failed."}), 500 # ## END FINAL DATABASE CODE ## @app.route('/delete_card//', methods=['DELETE']) def delete_card_endpoint(mode, item_id): data = request.get_json() contact_id = data.get('contactId') user_api_key = OPENROUTER_API_KEY # Use hardcoded server-side API key if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 # Step 1: Delete from JSON file (Existing Logic, Unchanged) user_data = _load_user_data(user_api_key, mode) item_found_in_json = False original_len = len(user_data) if mode == 'cards': user_data = [c for c in user_data if c.get('id') != item_id] if len(user_data) < original_len: item_found_in_json = True elif mode == 'brochures': if contact_id: for brochure in user_data: if brochure.get('id') == item_id: original_contacts_len = len(brochure.get('contacts', [])) brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id] if len(brochure.get('contacts', [])) < original_contacts_len: item_found_in_json = True break else: # Delete whole brochure user_data = [b for b in user_data if b.get('id') != item_id] if len(user_data) < original_len: item_found_in_json = True if item_found_in_json: _save_user_data(user_api_key, mode, user_data) # Step 1.5: Delete from ChromaDB (RAG knowledge base) item_found_in_chroma = False try: # Check if item exists in ChromaDB before deleting chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if chroma_data: if mode == 'cards': item_found_in_chroma = any(c.get('id') == item_id for c in chroma_data) elif mode == 'brochures': if contact_id: brochure = next((b for b in chroma_data if b.get('id') == item_id), None) if brochure: item_found_in_chroma = any(c.get('id') == contact_id for c in brochure.get('contacts', [])) else: item_found_in_chroma = any(b.get('id') == item_id for b in chroma_data) if mode == 'cards': # Remove card document from ChromaDB rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode) # Also delete metadata from ChromaDB rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id) print(f"ChromaDB: Removed document and metadata {item_id} from {mode} knowledge base") elif mode == 'brochures': if contact_id: # Contact deleted - re-index the brochure's contacts document brochure = next((b for b in user_data if b.get('id') == item_id), None) # Also check ChromaDB data if not found in JSON if not brochure and chroma_data: brochure = next((b for b in chroma_data if b.get('id') == item_id), None) if brochure: # Remove the contact from the brochure in ChromaDB brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id] if brochure: contacts_doc_id = f"{item_id}_contacts" rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode) contacts = brochure.get("contacts", []) if contacts: contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"] for contact in contacts: name, email, number = contact.get("Owner Name"), contact.get("Email"), contact.get("Number") contact_info = [f"Name: {name}"] if email: contact_info.append(f"Email: {email}") if number: contact_info.append(f"Phone: {number}") contact_text_parts.append("- " + ", ".join(contact_info)) contacts_document_text = "\n".join(contact_text_parts) rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode) print(f"ChromaDB: Re-indexed contacts for brochure {item_id} after contact deletion") # Update metadata in ChromaDB (re-save brochure with updated contacts) brochure['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure) else: # Whole brochure deleted - remove both contacts and info documents rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode) rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode) # Also delete metadata from ChromaDB rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id) print(f"ChromaDB: Removed brochure {item_id} documents and metadata from knowledge base") except Exception as e: print(f"ChromaDB removal warning: {e}") # ## FINAL DATABASE CODE ## # Step 2: Delete from Database (New Logic) try: user_hash = hashlib.sha256(user_api_key.encode()).hexdigest() if mode == 'cards': db_card = BusinessCard.query.filter_by(json_id=item_id, user_hash=user_hash).first() if db_card: db.session.delete(db_card) db.session.commit() print(f"Database record deleted for business card json_id: {item_id}") return jsonify({"success": True}) elif mode == 'brochures': if contact_id: db_contact = Contact.query.filter_by(json_id=contact_id).first() if db_contact and db_contact.brochure.user_hash == user_hash: db.session.delete(db_contact) db.session.commit() print(f"Database record deleted for brochure contact json_id: {contact_id}") return jsonify({"success": True}) else: # Delete whole brochure db_brochure = Brochure.query.filter_by(json_id=item_id, user_hash=user_hash).first() if db_brochure: db.session.delete(db_brochure) # Cascading delete will handle linked contacts db.session.commit() print(f"Database record deleted for brochure json_id: {item_id}") return jsonify({"success": True}) if not item_found_in_json and not item_found_in_chroma: # Try to find in ChromaDB if not in JSON (should rarely happen now) chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) if chroma_data: if mode == 'cards': for card in chroma_data: if card.get('id') == item_id: rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode) rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id) print(f"ChromaDB: Deleted card {item_id} directly from ChromaDB") return jsonify({"success": True}) elif mode == 'brochures': for brochure in chroma_data: if brochure.get('id') == item_id: if contact_id: brochure['contacts'] = [c for c in brochure.get('contacts', []) if c.get('id') != contact_id] brochure['_timestamp'] = time.time() rag_core.save_metadata_to_chroma(user_api_key, mode, item_id, brochure) # Re-index contacts in RAG contacts_doc_id = f"{item_id}_contacts" rag_core.remove_document_from_knowledge_base(user_api_key, contacts_doc_id, mode) contacts = brochure.get("contacts", []) if contacts: contact_text_parts = [f"Contact information for {brochure.get('company_name', 'this company')}:"] for c in contacts: name, email, number = c.get("Owner Name"), c.get("Email"), c.get("Number") contact_info = [f"Name: {name}"] if email: contact_info.append(f"Email: {email}") if number: contact_info.append(f"Phone: {number}") contact_text_parts.append("- " + ", ".join(contact_info)) contacts_document_text = "\n".join(contact_text_parts) rag_core.add_document_to_knowledge_base(user_api_key, contacts_document_text, contacts_doc_id, mode) print(f"ChromaDB: Deleted contact {contact_id} from brochure {item_id} in ChromaDB") return jsonify({"success": True}) else: # Delete whole brochure rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode) rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode) rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id) print(f"ChromaDB: Deleted brochure {item_id} directly from ChromaDB") return jsonify({"success": True}) return jsonify({"success": False, "message": "Item not found"}), 404 return jsonify({"success": True}) except Exception as e: db.session.rollback() print(f"DATABASE ERROR: Failed to delete record. Error: {e}") return jsonify({"success": False, "message": "Database delete failed."}), 500 # ## END FINAL DATABASE CODE ## @app.route('/delete_all/', methods=['DELETE']) def delete_all_endpoint(mode): """Delete all items for a given mode (cards or brochures)""" user_api_key = OPENROUTER_API_KEY if not user_api_key: return jsonify({'error': 'Server API key not configured'}), 400 if mode not in ['cards', 'brochures']: return jsonify({'error': 'Invalid mode'}), 400 deleted_count = 0 try: # Step 1: Count items before deletion (from both sources) user_data = _load_user_data(user_api_key, mode) chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode) # Get count from whichever source has more deleted_count = max(len(user_data), len(chroma_data) if chroma_data else 0) if deleted_count == 0: return jsonify({ 'success': True, 'deleted_count': 0, 'message': f'No {mode} to delete' }) print(f"Starting deletion of {deleted_count} {mode}...") # Step 2: Clear JSON file _save_user_data(user_api_key, mode, []) print(f"Cleared JSON file for {mode}") # Step 3: Delete ALL metadata from ChromaDB (bulk delete) metadata_deleted = rag_core.delete_all_metadata_from_chroma(user_api_key, mode) print(f"Deleted {metadata_deleted} metadata records from ChromaDB") # Step 4: Delete ALL document chunks from ChromaDB (bulk delete) docs_deleted = rag_core.delete_all_documents_from_chroma(user_api_key, mode) print(f"Deleted {docs_deleted} document chunks from ChromaDB") # Step 5: Delete from SQL Database user_hash = hashlib.sha256(user_api_key.encode()).hexdigest() if mode == 'cards': db_deleted = BusinessCard.query.filter_by(user_hash=user_hash).delete() print(f"Deleted {db_deleted} business cards from SQL database") elif mode == 'brochures': # Delete all brochures and their contacts (cascade) db_deleted = Brochure.query.filter_by(user_hash=user_hash).delete() print(f"Deleted {db_deleted} brochures from SQL database") db.session.commit() print(f"Successfully deleted all {deleted_count} {mode} from all storage layers") return jsonify({ 'success': True, 'deleted_count': deleted_count, 'message': f'Successfully deleted {deleted_count} {mode}' }) except Exception as e: db.session.rollback() print(f"DATABASE ERROR: Failed to delete all {mode}. Error: {e}") traceback.print_exc() return jsonify({ 'success': False, 'message': f'Failed to delete all {mode}: {str(e)}' }), 500 @app.route('/') def serve_dashboard(): return render_template('index.html') @app.route('/uploads/') def uploaded_file(filename): return send_from_directory(UPLOAD_FOLDER, filename) # Health check endpoint - responds immediately without waiting for model loading @app.route('/health') def health_check(): return jsonify({"status": "ok", "message": "Service is running"}), 200 # Create database tables (lightweight - runs at import time) with app.app_context(): db.create_all() print("Database tables (business_card, brochure, contact) checked and created if necessary.") # Lazy initialization for RAG system (deferred until first request) _rag_initialized = False @app.before_request def ensure_rag_initialized(): global _rag_initialized # Skip initialization for health checks and static files if request.endpoint in ('health_check', 'uploaded_file', 'static', 'serve_dashboard'): return if not _rag_initialized: print("First request received - initializing RAG system...") try: success = rag_core.initialize_rag_system() _rag_initialized = True if success: print("RAG system initialized successfully!") else: print("RAG system not available - OCR features will still work") except Exception as e: print(f"RAG initialization error (non-fatal): {e}") _rag_initialized = True # Mark as attempted so we don't retry if __name__ == "__main__": # Local development - initialize immediately for better dev experience try: rag_core.initialize_rag_system() except Exception as e: print(f"RAG initialization failed: {e}") print("App will start without RAG features") print("--- Server is starting! ---") print(f"User-specific data will be saved in '{os.path.abspath(DATA_FOLDER)}'") print("To use the dashboard, open your web browser and go to: http://127.0.0.1:5000") webbrowser.open_new('http://127.0.0.1:5000') app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)