File size: 6,117 Bytes
d545f81
 
 
 
3cdce90
19b5af3
3cdce90
19b5af3
 
3cdce90
 
d545f81
3cdce90
 
d545f81
3cdce90
 
 
 
 
 
19b5af3
8f4c69e
8d369b8
 
 
0aa781d
 
3cdce90
 
 
2ce8410
03c0f5d
8f4c69e
 
03c0f5d
3cdce90
03c0f5d
 
3cdce90
e1eb7f3
ddbf2de
3cdce90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce8410
e11fe89
 
3cdce90
2ce8410
3cdce90
 
 
03c0f5d
d545f81
3cdce90
03c0f5d
3cdce90
 
03c0f5d
 
 
e11fe89
3cdce90
e11fe89
03c0f5d
3cdce90
03c0f5d
 
 
 
 
 
 
 
3cdce90
03c0f5d
d545f81
 
 
3cdce90
03c0f5d
 
3cdce90
 
03c0f5d
3cdce90
 
03c0f5d
8d369b8
e11fe89
d545f81
03c0f5d
 
 
8d369b8
3cdce90
 
 
 
8d369b8
03c0f5d
 
 
 
 
 
 
8d369b8
03c0f5d
 
 
8d369b8
3cdce90
 
03c0f5d
d545f81
3cdce90
03c0f5d
 
 
3cdce90
 
 
03c0f5d
3cdce90
03c0f5d
3cdce90
e11fe89
3cdce90
ddbf2de
03c0f5d
 
 
3cdce90
03c0f5d
 
3cdce90
 
 
03c0f5d
3cdce90
03c0f5d
d545f81
3cdce90
e11fe89
fe6a7ce
3cdce90
d545f81
5615ee1
3cdce90
d545f81
3cdce90
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
XENO Bot - AI-powered customer service assistant
Main application file with Gradio interface
"""

import logging
import os
import traceback

from src.config import (COLLECTION_NAME, EMBEDDING_MODEL, LLM_MODEL_NAME,
                        SERVER_NAME, SERVER_PORT, SIMILARITY_THRESHOLD)
from src.intent_classifier import IntentClassifier
from src.interface import create_interface
from src.knowledge_base import get_knowledge_base_data
from src.logger import log_response, log_timing_data
from src.memory import create_session_config, retrieve_memory, update_memory
from src.response_generator import generate_xeno_response
# Import custom modules
from src.utils import PipelineTimer
from src.vector_store import (generate_embeddings, initialize_vector_store,
                              process_context)

# === Configuration ===
# Ensure API Key is set
if "GEMINI_API_KEY" not in os.environ:
    print("WARNING: GEMINI_API_KEY environment variable not found.")

# Initialize the client
embedding_model = EMBEDDING_MODEL
llm_model_name = LLM_MODEL_NAME
collection_name = COLLECTION_NAME

# === Intent Classification System ===
intent_classifier = IntentClassifier()

# === Load and Clean Knowledge Base ===
documents, metadatas, ids = get_knowledge_base_data()

# === Setup ChromaDB ===
collection, vector_store, retriever = initialize_vector_store()


# === Core Orchestration Logic ===
def get_context_and_answer(
    message, history, session_id, intent_classifier, retriever
):
    """
    Core orchestration function that handles the RAG pipeline
    
    Args:
        message: User's message
        history: Chat history
        session_id: Session identifier
        intent_classifier: IntentClassifier instance
        retriever: Vector store retriever instance
    
    Returns:
        Generated answer string
    """
    # Create timer per session
    timer = PipelineTimer()
    timer.reset()
    error_step = None
    notes = []

    try:
        # Create session memory config
        memory_config = create_session_config(session_id)

        # Step 1: Intent Classification
        intent, direct_response = intent_classifier.classify_intent(message)

        # Step 2: Memory Retrieval
        chat_history = retrieve_memory(memory_config)

        answer = ""
        source_ids = "N/A"
        knowledge_pairs = []

        if intent != "query":
            answer = direct_response
            notes.append(f"Simple intent: {intent}")
        else:
            if len(message.strip()) < 3:
                answer = "I'd be happy to help! Could you please provide more details about what you'd like to know?"
                notes.append("Message too short")
            else:
                try:
                    # Step 3: RAG Retrieval
                    with timer.time_step("rag_retrieval"):
                        queried_results = retriever.invoke(message)

                    # Step 4: Embedding Generation
                    query_embedding, doc_embeddings = generate_embeddings(
                        message, queried_results, timer
                    )

                    # Step 5: Similarity Calculation
                    with timer.time_step("similarity_calculation"):
                        import sentence_transformers.util as util
                        import torch
                        cosine_scores = util.cos_sim(
                            torch.tensor(query_embedding).float(),
                            torch.tensor(doc_embeddings).float(),
                        )[0].tolist()
                        max_score = max(cosine_scores) if cosine_scores else 0

                    if max_score < SIMILARITY_THRESHOLD:
                        answer = "I'm sorry, I couldn't find specific information for your question. Could you try rephrasing it, or contact XENO support directly?"
                        notes.append(f"Low similarity score: {max_score:.3f}")
                    else:
                        # Step 6: Context Processing
                        context, source_ids_list, knowledge_pairs = process_context(
                            queried_results, cosine_scores
                        )

                        # Step 7: LLM Generation
                        answer = generate_xeno_response(context, message, chat_history)
                        source_ids = ", ".join(source_ids_list)
                        notes.append(f"Max similarity: {max_score:.3f}")

                except Exception as e:
                    error_step = timer.current_step or "rag_processing"
                    print(f"Error during RAG processing: {e}")
                    traceback.print_exc()
                    answer = "I apologize, but I'm having a technical issue. Please try again shortly or contact XENO support."
                    notes.append(f"Error: {str(e)}")

        # Step 8: Memory Update
        update_memory(memory_config, message, answer)

        # Step 9: Response Logging
        log_response(message, answer, source_ids, knowledge_pairs, session_id)

        # Log timing data
        timing_summary = timer.get_timing_summary()
        log_timing_data(
            message,
            session_id,
            timing_summary,
            error_step=error_step,
            notes="; ".join(notes) if notes else None,
        )

        return answer

    except Exception as e:
        error_step = timer.current_step or "main_pipeline"
        logging.error(f"Error in main pipeline: {e}")
        logging.error(traceback.format_exc())

        timing_summary = timer.get_timing_summary()
        log_timing_data(
            message,
            session_id,
            timing_summary,
            error_step=error_step,
            notes=f"Pipeline error: {str(e)}",
        )

        return "I apologize, but I encountered an error processing your request. Please try again."


# === Main Interface Logic ===

if __name__ == "__main__":
    iface = create_interface(intent_classifier, retriever)
    iface.launch(
        share=False, server_name=SERVER_NAME, server_port=SERVER_PORT, ssr_mode=False
    )