Spaces:
Build error
Build error
File size: 6,117 Bytes
d545f81 3cdce90 19b5af3 3cdce90 19b5af3 3cdce90 d545f81 3cdce90 d545f81 3cdce90 19b5af3 8f4c69e 8d369b8 0aa781d 3cdce90 2ce8410 03c0f5d 8f4c69e 03c0f5d 3cdce90 03c0f5d 3cdce90 e1eb7f3 ddbf2de 3cdce90 2ce8410 e11fe89 3cdce90 2ce8410 3cdce90 03c0f5d d545f81 3cdce90 03c0f5d 3cdce90 03c0f5d e11fe89 3cdce90 e11fe89 03c0f5d 3cdce90 03c0f5d 3cdce90 03c0f5d d545f81 3cdce90 03c0f5d 3cdce90 03c0f5d 3cdce90 03c0f5d 8d369b8 e11fe89 d545f81 03c0f5d 8d369b8 3cdce90 8d369b8 03c0f5d 8d369b8 03c0f5d 8d369b8 3cdce90 03c0f5d d545f81 3cdce90 03c0f5d 3cdce90 03c0f5d 3cdce90 03c0f5d 3cdce90 e11fe89 3cdce90 ddbf2de 03c0f5d 3cdce90 03c0f5d 3cdce90 03c0f5d 3cdce90 03c0f5d d545f81 3cdce90 e11fe89 fe6a7ce 3cdce90 d545f81 5615ee1 3cdce90 d545f81 3cdce90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | """
XENO Bot - AI-powered customer service assistant
Main application file with Gradio interface
"""
import logging
import os
import traceback
from src.config import (COLLECTION_NAME, EMBEDDING_MODEL, LLM_MODEL_NAME,
SERVER_NAME, SERVER_PORT, SIMILARITY_THRESHOLD)
from src.intent_classifier import IntentClassifier
from src.interface import create_interface
from src.knowledge_base import get_knowledge_base_data
from src.logger import log_response, log_timing_data
from src.memory import create_session_config, retrieve_memory, update_memory
from src.response_generator import generate_xeno_response
# Import custom modules
from src.utils import PipelineTimer
from src.vector_store import (generate_embeddings, initialize_vector_store,
process_context)
# === Configuration ===
# Ensure API Key is set
if "GEMINI_API_KEY" not in os.environ:
print("WARNING: GEMINI_API_KEY environment variable not found.")
# Initialize the client
embedding_model = EMBEDDING_MODEL
llm_model_name = LLM_MODEL_NAME
collection_name = COLLECTION_NAME
# === Intent Classification System ===
intent_classifier = IntentClassifier()
# === Load and Clean Knowledge Base ===
documents, metadatas, ids = get_knowledge_base_data()
# === Setup ChromaDB ===
collection, vector_store, retriever = initialize_vector_store()
# === Core Orchestration Logic ===
def get_context_and_answer(
message, history, session_id, intent_classifier, retriever
):
"""
Core orchestration function that handles the RAG pipeline
Args:
message: User's message
history: Chat history
session_id: Session identifier
intent_classifier: IntentClassifier instance
retriever: Vector store retriever instance
Returns:
Generated answer string
"""
# Create timer per session
timer = PipelineTimer()
timer.reset()
error_step = None
notes = []
try:
# Create session memory config
memory_config = create_session_config(session_id)
# Step 1: Intent Classification
intent, direct_response = intent_classifier.classify_intent(message)
# Step 2: Memory Retrieval
chat_history = retrieve_memory(memory_config)
answer = ""
source_ids = "N/A"
knowledge_pairs = []
if intent != "query":
answer = direct_response
notes.append(f"Simple intent: {intent}")
else:
if len(message.strip()) < 3:
answer = "I'd be happy to help! Could you please provide more details about what you'd like to know?"
notes.append("Message too short")
else:
try:
# Step 3: RAG Retrieval
with timer.time_step("rag_retrieval"):
queried_results = retriever.invoke(message)
# Step 4: Embedding Generation
query_embedding, doc_embeddings = generate_embeddings(
message, queried_results, timer
)
# Step 5: Similarity Calculation
with timer.time_step("similarity_calculation"):
import sentence_transformers.util as util
import torch
cosine_scores = util.cos_sim(
torch.tensor(query_embedding).float(),
torch.tensor(doc_embeddings).float(),
)[0].tolist()
max_score = max(cosine_scores) if cosine_scores else 0
if max_score < SIMILARITY_THRESHOLD:
answer = "I'm sorry, I couldn't find specific information for your question. Could you try rephrasing it, or contact XENO support directly?"
notes.append(f"Low similarity score: {max_score:.3f}")
else:
# Step 6: Context Processing
context, source_ids_list, knowledge_pairs = process_context(
queried_results, cosine_scores
)
# Step 7: LLM Generation
answer = generate_xeno_response(context, message, chat_history)
source_ids = ", ".join(source_ids_list)
notes.append(f"Max similarity: {max_score:.3f}")
except Exception as e:
error_step = timer.current_step or "rag_processing"
print(f"Error during RAG processing: {e}")
traceback.print_exc()
answer = "I apologize, but I'm having a technical issue. Please try again shortly or contact XENO support."
notes.append(f"Error: {str(e)}")
# Step 8: Memory Update
update_memory(memory_config, message, answer)
# Step 9: Response Logging
log_response(message, answer, source_ids, knowledge_pairs, session_id)
# Log timing data
timing_summary = timer.get_timing_summary()
log_timing_data(
message,
session_id,
timing_summary,
error_step=error_step,
notes="; ".join(notes) if notes else None,
)
return answer
except Exception as e:
error_step = timer.current_step or "main_pipeline"
logging.error(f"Error in main pipeline: {e}")
logging.error(traceback.format_exc())
timing_summary = timer.get_timing_summary()
log_timing_data(
message,
session_id,
timing_summary,
error_step=error_step,
notes=f"Pipeline error: {str(e)}",
)
return "I apologize, but I encountered an error processing your request. Please try again."
# === Main Interface Logic ===
if __name__ == "__main__":
iface = create_interface(intent_classifier, retriever)
iface.launch(
share=False, server_name=SERVER_NAME, server_port=SERVER_PORT, ssr_mode=False
)
|