cognichat / app.py
HYPERXD
Implement Query Expansion System with Comprehensive Testing
02955c8
# app.py
import atexit
import logging
import os
import time
import uuid
from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
from werkzeug.utils import secure_filename
from rag_processor import create_rag_chain
from typing import Sequence, Any
from gtts import gTTS
import io
import re
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
from langchain_community.document_loaders import (
TextLoader,
PyPDFLoader,
Docx2txtLoader,
)
# Additional imports for robust PDF handling
from langchain_core.documents import Document
import fitz
# Import session and file managers
from utils.session_manager import SessionManager
from utils.file_manager import FileManager
# Text Splitter, Embeddings, Retrievers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
from langchain_community.retrievers import BM25Retriever
from langchain_community.chat_message_histories import ChatMessageHistory
from sentence_transformers.cross_encoder import CrossEncoder
import numpy as np
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(24)
# Query Expansion Configuration
# Set to True to enable multi-query expansion (improves retrieval quality)
# Strategies: "quick" (2 queries), "balanced" (3-4 queries), "comprehensive" (5-6 queries)
ENABLE_QUERY_EXPANSION = True
QUERY_EXPANSION_STRATEGY = "balanced" # Can be: quick, balanced, comprehensive
class LocalReranker(BaseDocumentCompressor):
model: Any
top_n: int = 5
class Config:
arbitrary_types_allowed = True
def compress_documents(
self,
documents: Sequence[Document],
query: str,
callbacks=None,
) -> Sequence[Document]:
if not documents:
return []
pairs = [[query, doc.page_content] for doc in documents]
scores = self.model.predict(pairs, show_progress_bar=False)
doc_scores = list(zip(documents, scores))
sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
return [doc for doc, score in sorted_doc_scores[:self.top_n]]
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
if is_hf_spaces:
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
else:
app.config['UPLOAD_FOLDER'] = 'uploads'
try:
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
print(f"✓ Upload folder ready: {app.config['UPLOAD_FOLDER']}")
except Exception as e:
print(f"✗ Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
# Fallback to /tmp if the configured path fails
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
print(f"✓ Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
# Initialize managers with automatic cleanup
session_manager = SessionManager(ttl_seconds=86400) # 24-hour TTL
file_manager = FileManager(
upload_folder=app.config['UPLOAD_FOLDER'],
max_age_seconds=86400 # 24 hours
)
logger.info("✓ Session and file managers initialized")
# Message histories for conversational memory (managed per session)
# Note: This is kept separate from session_manager as it's required by LangChain's RunnableWithMessageHistory
message_histories = {}
print("Loading embedding model...")
cache_base = os.path.expanduser("~/.cache") if os.path.expanduser("~") != "~" else "/tmp/hf_cache"
os.environ.setdefault('HF_HOME', f'{cache_base}/huggingface')
os.environ.setdefault('HF_HUB_CACHE', f'{cache_base}/huggingface/hub')
os.environ.setdefault('TRANSFORMERS_CACHE', f'{cache_base}/transformers')
os.environ.setdefault('SENTENCE_TRANSFORMERS_HOME', f'{cache_base}/sentence_transformers')
cache_dirs = [
os.environ['HF_HOME'],
os.environ['HF_HUB_CACHE'],
os.environ['TRANSFORMERS_CACHE'],
os.environ['SENTENCE_TRANSFORMERS_HOME']
]
for cache_dir in cache_dirs:
try:
os.makedirs(cache_dir, mode=0o777, exist_ok=True)
# Set permissions explicitly after creation
os.chmod(cache_dir, 0o777)
print(f"Cache directory ready: {cache_dir}")
except PermissionError as pe:
print(f"Permission error for {cache_dir}: {pe}")
# Try to use a fallback directory in /tmp
fallback_dir = cache_dir.replace('/app/.cache', '/tmp/hf_cache')
try:
os.makedirs(fallback_dir, mode=0o777, exist_ok=True)
print(f"Using fallback cache directory: {fallback_dir}")
# Update environment variable to point to fallback
env_var_map = {
'/app/.cache/huggingface': 'HF_HOME',
'/app/.cache/huggingface/hub': 'HF_HUB_CACHE',
'/app/.cache/transformers': 'TRANSFORMERS_CACHE',
'/app/.cache/sentence_transformers': 'SENTENCE_TRANSFORMERS_HOME'
}
if cache_dir in env_var_map:
os.environ[env_var_map[cache_dir]] = fallback_dir
except Exception as fe:
print(f"Failed to create fallback directory {fallback_dir}: {fe}")
except Exception as e:
print(f"Warning: Could not create {cache_dir}: {e}")
# Try loading embedding model with error handling and fallbacks
try:
print("Attempting to load embedding model...")
EMBEDDING_MODEL = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-miniLM-L6-v2",
model_kwargs={'device': 'cpu'}, # Ensure CPU usage
cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME')
)
print("Embedding model loaded successfully.")
except Exception as e:
print(f"Error loading embedding model with full name: {e}")
print("Trying shortened model name...")
try:
EMBEDDING_MODEL = HuggingFaceEmbeddings(
model_name="all-miniLM-L6-v2",
model_kwargs={'device': 'cpu'},
cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME')
)
print("Embedding model with shortened name loaded successfully.")
except Exception as e2:
print(f"Failed to load embedding model with shortened name: {e2}")
print("Trying without cache folder specification...")
try:
EMBEDDING_MODEL = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-miniLM-L6-v2",
model_kwargs={'device': 'cpu'}
)
print("Embedding model loaded without cache folder specification.")
except Exception as e3:
print(f"Final attempt failed: {e3}")
# Use a simpler fallback model or raise the error
raise Exception(f"Could not load any embedding model. Last error: {e3}")
print("Loading local re-ranking model...")
RERANKER_MODEL = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1", device='cpu')
print("Re-ranking model loaded successfully.")
def load_pdf_with_fallback(filepath):
"""
Tries to load PDF using multiple methods for better reliability.
Returns a list of Document objects.
"""
# Method 1: Try PyPDFLoader (primary method)
try:
loader = PyPDFLoader(filepath)
docs = loader.load()
if docs and any(doc.page_content.strip() for doc in docs):
print(f"✓ Successfully loaded PDF using PyPDFLoader: {filepath}")
return docs
except Exception as e:
print(f"PyPDFLoader failed for {filepath}: {e}")
# Method 2: Try PyMuPDF (fitz) as fallback
try:
import fitz
docs = []
pdf_doc = fitz.open(filepath)
for page_num in range(pdf_doc.page_count):
page = pdf_doc[page_num]
text = page.get_text()
if text.strip(): # Only add pages with content
doc = Document(
page_content=text,
metadata={
"source": filepath,
"page": page_num + 1,
"total_pages": pdf_doc.page_count
}
)
docs.append(doc)
pdf_doc.close()
if docs:
print(f"✓ Successfully loaded PDF using PyMuPDF: {filepath}")
return docs
except Exception as e:
print(f"PyMuPDF failed for {filepath}: {e}")
# Method 3: Try to extract text using pdfplumber (if available)
try:
import pdfplumber
docs = []
with pdfplumber.open(filepath) as pdf:
for page_num, page in enumerate(pdf.pages):
text = page.extract_text()
if text and text.strip():
doc = Document(
page_content=text,
metadata={
"source": filepath,
"page": page_num + 1,
"total_pages": len(pdf.pages)
}
)
docs.append(doc)
if docs:
print(f"✓ Successfully loaded PDF using pdfplumber: {filepath}")
return docs
except ImportError:
print("pdfplumber not available, skipping method 3")
except Exception as e:
print(f"pdfplumber failed for {filepath}: {e}")
# If all methods fail, raise the original error
raise Exception(f"Could not load PDF {filepath} using any available method. The file may be corrupted, password-protected, or in an unsupported format.")
# A dictionary to map file extensions to their corresponding loader classes
LOADER_MAPPING = {
".txt": TextLoader,
".pdf": load_pdf_with_fallback, # Use our custom PDF loader
".docx": Docx2txtLoader,
}
def get_session_history(session_id: str) -> ChatMessageHistory:
"""
Retrieves the chat history for a given session ID. If it doesn't exist,
a new history object is created.
Note: Message histories should be cleaned up when sessions expire
via the cleanup endpoint or periodic maintenance.
"""
if session_id not in message_histories:
message_histories[session_id] = ChatMessageHistory()
return message_histories[session_id]
def cleanup_expired_message_histories():
"""
Remove message histories for sessions that no longer exist.
Called periodically to prevent memory leaks.
"""
# Get list of active session IDs from session_manager
with session_manager.lock:
active_sessions = set(session_manager.sessions.keys())
# Find and remove orphaned message histories
orphaned = []
for session_id in list(message_histories.keys()):
if session_id not in active_sessions:
orphaned.append(session_id)
for session_id in orphaned:
del message_histories[session_id]
if orphaned:
logger.info(f"Cleaned up {len(orphaned)} orphaned message histories")
return len(orphaned)
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint for monitoring."""
return jsonify({'status': 'healthy', 'message': 'CogniChat is running'}), 200
@app.route('/', methods=['GET'])
def index():
"""Renders the main page."""
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_files():
"""Handles multiple file uploads, processing, and RAG chain creation."""
files = request.files.getlist('file')
if not files or all(f.filename == '' for f in files):
return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
all_docs = []
all_filenames = []
try:
print(f"Processing {len(files)} files...")
processed_files = []
failed_files = []
file_paths = [] # Track all saved file paths for cleanup
for file in files:
if file and file.filename:
filename = secure_filename(file.filename)
all_filenames.append(filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
try:
file.save(filepath)
file_paths.append(filepath) # Track for cleanup
print(f"✓ Successfully saved file: {filepath}")
except Exception as save_error:
print(f"✗ Failed to save file {filename}: {save_error}")
failed_files.append(f"{filename} (failed to save: {save_error})")
continue
file_extension = os.path.splitext(filename)[1].lower()
if file_extension not in LOADER_MAPPING:
print(f"Skipping unsupported file type: {filename}")
failed_files.append(f"{filename} (unsupported format)")
continue
# Add file-specific error handling
try:
loader_or_function = LOADER_MAPPING[file_extension]
# Handle custom PDF loader function vs regular loader classes
if file_extension == ".pdf":
file_docs = loader_or_function(filepath) # Direct function call
else:
loader = loader_or_function(filepath) # Create loader instance
file_docs = loader.load()
if file_docs: # Only add if documents were successfully loaded
all_docs.extend(file_docs)
processed_files.append(filename)
print(f"✓ Successfully processed: {filename} ({len(file_docs)} pages/sections)")
else:
failed_files.append(f"{filename} (no content extracted)")
print(f"⚠ Warning: No content extracted from {filename}")
except Exception as file_error:
error_msg = str(file_error)
print(f"✗ Error processing {filename}: {error_msg}")
# Provide specific error messages for common PDF issues
if "endstream" in error_msg.lower():
failed_files.append(f"{filename} (corrupted PDF - missing endstream marker)")
elif "pdf" in error_msg.lower() or "pypdf" in error_msg.lower():
failed_files.append(f"{filename} (PDF parsing error)")
elif "permission" in error_msg.lower():
failed_files.append(f"{filename} (password protected or permission denied)")
elif "encoding" in error_msg.lower():
failed_files.append(f"{filename} (text encoding issue)")
else:
failed_files.append(f"{filename} ({error_msg[:50]}...)" if len(error_msg) > 50 else f"{filename} ({error_msg})")
continue
# Check if any documents were successfully processed
if not all_docs:
if failed_files:
error_msg = f"Failed to process all uploaded files:\n" + "\n".join(failed_files[:5])
if len(failed_files) > 5:
error_msg += f"\n...and {len(failed_files) - 5} more files"
return jsonify({'status': 'error', 'message': error_msg}), 400
else:
return jsonify({'status': 'error', 'message': 'No processable files were uploaded.'}), 400
# --- Process all documents together ---
print(f"Successfully processed {len(processed_files)} files, creating knowledge base...")
# Improved chunking: larger chunks preserve context better for complex documents
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500, # Increased from 1000 for better context preservation
chunk_overlap=300, # Increased from 200 for better continuity
separators=["\n\n", "\n", ". ", " ", ""], # Prioritize natural breaks
length_function=len
)
splits = text_splitter.split_documents(all_docs)
print(f"✓ Created {len(splits)} text chunks from documents")
print("Creating vector store for all documents...")
vectorstore = FAISS.from_documents(documents=splits, embedding=EMBEDDING_MODEL)
# Increased retrieval for better coverage of lengthy documents
bm25_retriever = BM25Retriever.from_documents(splits)
bm25_retriever.k = 10 # Increased from 5 for better initial recall
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 10}) # Increased from 5
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, faiss_retriever],
weights=[0.5, 0.5] # Equal weight for hybrid search
)
# Reranker provides precision after high-recall retrieval
reranker = LocalReranker(model=RERANKER_MODEL, top_n=5) # Increased from 3 for more context
compression_retriever = ContextualCompressionRetriever(
base_compressor=reranker,
base_retriever=ensemble_retriever
)
session_id = str(uuid.uuid4())
print(f"Creating RAG chain for session {session_id}...")
try:
# Create RAG chain with Query Expansion enabled
rag_chain = create_rag_chain(
retriever=compression_retriever,
get_session_history_func=get_session_history,
enable_query_expansion=ENABLE_QUERY_EXPANSION,
expansion_strategy=QUERY_EXPANSION_STRATEGY
)
session_manager.create_session(session_id, rag_chain)
print(f"✓ RAG chain created successfully for session {session_id} with {len(processed_files)} documents.")
print(f" Query Expansion: {'ENABLED' if ENABLE_QUERY_EXPANSION else 'DISABLED'} ({QUERY_EXPANSION_STRATEGY})")
# Track uploaded files for cleanup
for file_path in file_paths:
file_manager.register_file(session_id, file_path)
except Exception as rag_error:
print(f"✗ Failed to create RAG chain: {rag_error}")
import traceback
traceback.print_exc()
# Clean up session on error
try:
session_manager.delete_session(session_id)
file_manager.cleanup_session_files(session_id)
logger.error(f"Cleaned up session {session_id} after error")
except:
pass
raise rag_error
session['session_id'] = session_id
print(f"✓ Session stored in Flask session: {session_id}")
# Prepare success message with file processing summary
success_msg = f"Successfully processed: {', '.join(processed_files)}"
if failed_files:
success_msg += f"\n\nFailed to process {len(failed_files)} file(s):\n" + "\n".join(failed_files[:3])
if len(failed_files) > 3:
success_msg += f"\n...and {len(failed_files) - 3} more"
return jsonify({
'status': 'success',
'filename': success_msg,
'processed_count': len(processed_files),
'failed_count': len(failed_files),
'session_id': session_id # Include session_id in response for client to store
})
except Exception as e:
print(f"Error creating RAG chain: {e}")
return jsonify({'status': 'error', 'message': f'Failed to process files: {str(e)}'}), 500
@app.route('/chat', methods=['POST'])
def chat():
"""Handles chat messages and streams the response with memory."""
data = request.get_json()
question = data.get('question')
# Try to get session_id from multiple sources (Flask session or request body)
session_id = session.get('session_id') or data.get('session_id')
print(f"\n=== Chat Request Debug ===")
print(f"Raw request data: {data}")
print(f"Question: '{question}' (type: {type(question)})")
print(f"Session ID from Flask session: {session.get('session_id')}")
print(f"Session ID from request body: {data.get('session_id')}")
print(f"Final session ID used: {session_id}")
print(f"Active sessions: {session_manager.get_session_count()}")
# Check each condition individually with detailed logging
if not question:
print("✗ FAILURE: No question provided")
return jsonify({'status': 'error', 'message': 'No question provided.'}), 400
else:
print("✓ Question is valid")
if not session_id:
print("✗ FAILURE: No session ID in Flask session")
return jsonify({'status': 'error', 'message': 'No session found. Please upload documents first.'}), 400
else:
print(f"✓ Session ID found: {session_id}")
# Get RAG chain from session manager
rag_chain = session_manager.get_session(session_id)
if not rag_chain:
print(f"✗ FAILURE: Session ID {session_id} not found or expired")
return jsonify({'status': 'error', 'message': 'Session expired. Please upload documents again.'}), 404
else:
print(f"✓ RAG chain found for session: {session_id}")
try:
config = {"configurable": {"session_id": session_id}}
def generate():
"""A generator function to stream the response."""
# Create the input dictionary - just question, config passed separately
input_data = {
"question": question,
"config": config # Keep config in input_data for the lambda function
}
try:
# The final chain expects input_data with question and config
response = rag_chain.invoke(input_data)
yield response
except Exception as stream_error:
print(f"Streaming error: {stream_error}")
import traceback
traceback.print_exc()
# Fallback to direct invocation
try:
response = rag_chain.invoke(input_data)
yield response
except Exception as invoke_error:
print(f"Invoke error: {invoke_error}")
import traceback
traceback.print_exc()
yield "I apologize, but I'm having trouble processing your question. Please try again or upload your documents again."
return Response(stream_with_context(generate()), mimetype='text/plain')
except Exception as e:
print(f"Error during chat invocation: {e}")
return Response("An error occurred while getting the answer.", status=500, mimetype='text/plain')
def clean_markdown_for_tts(text: str) -> str:
"""Removes markdown formatting for cleaner text-to-speech output."""
# Remove bold (**text**) and italics (*text* or _text_)
text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text)
text = re.sub(r'\_(.*?)\_', r'\1', text)
# Remove inline code (`code`)
text = re.sub(r'`(.*?)`', r'\1', text)
# Remove headings (e.g., #, ##, ###)
text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
# Remove list item markers (*, -, 1.)
text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
# Remove blockquotes (>)
text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
# Replace multiple newlines with a single space
text = re.sub(r'\n+', ' ', text)
return text.strip()
@app.route('/tts', methods=['POST'])
def text_to_speech():
"""Generates audio from text and returns it as an MP3 stream."""
data = request.get_json()
text = data.get('text')
if not text:
return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
try:
# --- FIX IS HERE: Clean the text before sending to gTTS ---
clean_text = clean_markdown_for_tts(text)
tts = gTTS(clean_text, lang='en')
mp3_fp = io.BytesIO()
tts.write_to_fp(mp3_fp)
mp3_fp.seek(0)
return Response(mp3_fp, mimetype='audio/mpeg')
except Exception as e:
print(f"Error in TTS generation: {e}")
return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
@app.route('/debug', methods=['GET'])
def debug_info():
"""Debug endpoint to check configuration."""
api_key = os.getenv("GROQ_API_KEY")
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
return jsonify({
'environment': 'Hugging Face Spaces' if is_hf_spaces else 'Local/Other',
'upload_folder': app.config['UPLOAD_FOLDER'],
'upload_folder_writable': os.access(app.config['UPLOAD_FOLDER'], os.W_OK),
'groq_api_key_set': bool(api_key and api_key != "your_groq_api_key_here"),
'groq_api_key_placeholder': api_key == "your_groq_api_key_here" if api_key else False,
'sessions_count': session_manager.get_session_count(),
'flask_session_id': session.get('session_id'),
'flask_session_keys': list(session.keys()) if session else [],
'embedding_model_loaded': 'EMBEDDING_MODEL' in globals(),
'space_id': os.getenv("SPACE_ID"),
'hf_spaces_detected': is_hf_spaces
})
@app.route('/cleanup', methods=['POST'])
def cleanup_session():
"""Manually cleanup a session"""
data = request.get_json() or {}
session_id = data.get('session_id') or session.get('session_id')
if not session_id:
return jsonify({'error': 'No session ID provided'}), 400
# Delete session
deleted = session_manager.delete_session(session_id)
# Delete message history
if session_id in message_histories:
del message_histories[session_id]
logger.info(f"Deleted message history for session {session_id}")
# Delete files
files_deleted = file_manager.cleanup_session_files(session_id)
if deleted:
return jsonify({
'status': 'success',
'session_id': session_id,
'files_deleted': files_deleted
})
else:
return jsonify({
'error': 'Session not found',
'files_deleted': files_deleted
}), 404
@app.route('/stats', methods=['GET'])
def get_stats():
"""Get server statistics and perform maintenance"""
# Perform periodic cleanup of orphaned message histories
orphaned = cleanup_expired_message_histories()
disk_usage = file_manager.get_disk_usage()
session_stats = session_manager.get_memory_stats()
return jsonify({
'active_sessions': session_manager.get_session_count(),
'message_histories': len(message_histories),
'session_stats': session_stats,
'uploaded_files': disk_usage['file_count'],
'disk_usage_mb': disk_usage['total_mb'],
'orphaned_cleaned': orphaned,
'timestamp': time.time()
})
@app.route('/test-session', methods=['GET', 'POST'])
def test_session():
"""Test endpoint to debug Flask session issues."""
if request.method == 'POST':
# Set a test session value
session['test_key'] = 'test_value'
session['timestamp'] = str(time.time())
return jsonify({
'action': 'session_set',
'test_key': session.get('test_key'),
'timestamp': session.get('timestamp'),
'session_keys': list(session.keys())
})
else:
# Read session values
return jsonify({
'action': 'session_read',
'test_key': session.get('test_key'),
'timestamp': session.get('timestamp'),
'session_id': session.get('session_id'),
'session_keys': list(session.keys()),
'has_session_data': bool(session)
})
def cleanup_on_shutdown():
"""Called when server shuts down"""
logger.info("Server shutting down, cleaning up...")
session_manager.cleanup_all()
message_histories.clear() # Clear all message histories
cleaned = file_manager.cleanup_old_files()
logger.info(f"Cleanup complete: {cleaned['deleted']} files deleted, {len(message_histories)} message histories cleared")
atexit.register(cleanup_on_shutdown)
if __name__ == '__main__':
print(f"\n=== Application Startup Complete at {time.strftime('%Y-%m-%d %H:%M:%S')} ===")
# Check environment
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
api_key = os.getenv('GROQ_API_KEY')
print(f"Environment: {'Hugging Face Spaces' if is_hf_spaces else 'Local Development'}")
print(f"Upload folder: {app.config['UPLOAD_FOLDER']}")
if is_hf_spaces:
print(f"Space ID: {os.getenv('SPACE_ID', 'Not set')}")
print(f"GROQ API Key: {'Set via HF Secrets' if api_key and api_key != 'your_groq_api_key_here' else 'NOT SET - Add to Space Secrets'}")
else:
print(f"GROQ API Key: {'Set' if api_key and api_key != 'your_groq_api_key_here' else 'NOT SET'}")
print(f"Embedding model: {'Loaded' if 'EMBEDDING_MODEL' in globals() else 'NOT LOADED'}")
if not api_key or api_key == 'your_groq_api_key_here':
print("\n⚠️ WARNING: GROQ API KEY NOT CONFIGURED!")
if is_hf_spaces:
print(" → Go to your Space Settings > Repository Secrets")
print(" → Add GROQ_API_KEY as a secret")
else:
print(" → Update .env file with your GROQ API key")
# Use port 7860 for Hugging Face Spaces, fallback to 5001 for local development
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", port=port, debug=False)