Spaces:
Sleeping
Sleeping
Update llm_handler.py
Browse files- llm_handler.py +29 -10
llm_handler.py
CHANGED
|
@@ -106,15 +106,24 @@ def get_rag_response(query: str, session_id: str = None) -> tuple[str, str]:
|
|
| 106 |
if not all([encoder, chroma_collection, openrouter_client]):
|
| 107 |
return "Chatbot is not ready. Models or clients are not loaded.", session_id or create_chat_session()
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
session_id = create_chat_session()
|
| 112 |
print(f"π Created new chat session: {session_id}")
|
|
|
|
|
|
|
| 113 |
|
| 114 |
# Validate session exists, create if it doesn't
|
| 115 |
if session_id not in chat_sessions:
|
| 116 |
chat_sessions[session_id] = []
|
| 117 |
-
print(f"
|
|
|
|
|
|
|
| 118 |
|
| 119 |
# 1. Retrieve relevant documents from ChromaDB
|
| 120 |
query_embedding = encoder.encode([query])[0].tolist()
|
|
@@ -137,7 +146,11 @@ Rules:
|
|
| 137 |
- If asked for such info, politely refuse and redirect them to the official PM Internship portal.
|
| 138 |
- Keep answers clear, natural, and helpful β aim for short but complete responses (3β6 sentences).
|
| 139 |
- Use a friendly, encouraging tone while staying professional.
|
| 140 |
-
- Remember the conversation history and provide contextual responses.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
If the context doesn't have the answer, use your own general knowledge to provide a helpful response, even then if you are unable to answer the question, say: "I don't have that information, please check the official PM Internship portal.".
|
| 143 |
"""
|
|
@@ -145,13 +158,17 @@ If the context doesn't have the answer, use your own general knowledge to provid
|
|
| 145 |
# Build the conversation messages
|
| 146 |
messages = [{"role": "system", "content": system_prompt}]
|
| 147 |
|
| 148 |
-
# Add chat history
|
| 149 |
chat_history = get_chat_history(session_id)
|
| 150 |
-
|
|
|
|
| 151 |
|
| 152 |
-
# Add current query
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
try:
|
| 157 |
completion = openrouter_client.chat.completions.create(
|
|
@@ -163,10 +180,12 @@ If the context doesn't have the answer, use your own general knowledge to provid
|
|
| 163 |
|
| 164 |
answer = completion.choices[0].message.content
|
| 165 |
|
| 166 |
-
# Add the conversation to chat history
|
| 167 |
add_to_chat_history(session_id, "user", query)
|
| 168 |
add_to_chat_history(session_id, "assistant", answer)
|
| 169 |
|
|
|
|
|
|
|
| 170 |
return answer, session_id
|
| 171 |
|
| 172 |
except Exception as e:
|
|
|
|
| 106 |
if not all([encoder, chroma_collection, openrouter_client]):
|
| 107 |
return "Chatbot is not ready. Models or clients are not loaded.", session_id or create_chat_session()
|
| 108 |
|
| 109 |
+
def get_rag_response(query: str, session_id: str = None) -> tuple[str, str]:
|
| 110 |
+
"""Generates a response using Retrieval-Augmented Generation with chat memory."""
|
| 111 |
+
if not all([encoder, chroma_collection, openrouter_client]):
|
| 112 |
+
return "Chatbot is not ready. Models or clients are not loaded.", session_id or create_chat_session()
|
| 113 |
+
|
| 114 |
+
# Create a new session ONLY if none provided
|
| 115 |
+
if session_id is None or session_id == "":
|
| 116 |
session_id = create_chat_session()
|
| 117 |
print(f"π Created new chat session: {session_id}")
|
| 118 |
+
else:
|
| 119 |
+
print(f"π Using existing session: {session_id}")
|
| 120 |
|
| 121 |
# Validate session exists, create if it doesn't
|
| 122 |
if session_id not in chat_sessions:
|
| 123 |
chat_sessions[session_id] = []
|
| 124 |
+
print(f"β οΈ Session {session_id} not found in memory, creating new one")
|
| 125 |
+
else:
|
| 126 |
+
print(f"β
Found existing session with {len(chat_sessions[session_id])} messages")
|
| 127 |
|
| 128 |
# 1. Retrieve relevant documents from ChromaDB
|
| 129 |
query_embedding = encoder.encode([query])[0].tolist()
|
|
|
|
| 146 |
- If asked for such info, politely refuse and redirect them to the official PM Internship portal.
|
| 147 |
- Keep answers clear, natural, and helpful β aim for short but complete responses (3β6 sentences).
|
| 148 |
- Use a friendly, encouraging tone while staying professional.
|
| 149 |
+
- IMPORTANT: Remember the conversation history and provide contextual responses based on what was discussed earlier.
|
| 150 |
+
- When user says "the first one", "that internship", "it", etc., refer back to what was mentioned in the conversation history.
|
| 151 |
+
|
| 152 |
+
Available internship context for this query:
|
| 153 |
+
""" + context + """
|
| 154 |
|
| 155 |
If the context doesn't have the answer, use your own general knowledge to provide a helpful response, even then if you are unable to answer the question, say: "I don't have that information, please check the official PM Internship portal.".
|
| 156 |
"""
|
|
|
|
| 158 |
# Build the conversation messages
|
| 159 |
messages = [{"role": "system", "content": system_prompt}]
|
| 160 |
|
| 161 |
+
# Add chat history (this is the crucial part!)
|
| 162 |
chat_history = get_chat_history(session_id)
|
| 163 |
+
for msg in chat_history:
|
| 164 |
+
messages.append(msg)
|
| 165 |
|
| 166 |
+
# Add current user query (without the context prefix this time)
|
| 167 |
+
messages.append({"role": "user", "content": query})
|
| 168 |
+
|
| 169 |
+
print(f"π Debug - Messages being sent to LLM:")
|
| 170 |
+
for i, msg in enumerate(messages):
|
| 171 |
+
print(f" {i}: {msg['role']}: {msg['content'][:100]}...")
|
| 172 |
|
| 173 |
try:
|
| 174 |
completion = openrouter_client.chat.completions.create(
|
|
|
|
| 180 |
|
| 181 |
answer = completion.choices[0].message.content
|
| 182 |
|
| 183 |
+
# Add the conversation to chat history (store clean versions without context)
|
| 184 |
add_to_chat_history(session_id, "user", query)
|
| 185 |
add_to_chat_history(session_id, "assistant", answer)
|
| 186 |
|
| 187 |
+
print(f"πΎ Added to history - Session {session_id} now has {len(chat_sessions[session_id])} messages")
|
| 188 |
+
|
| 189 |
return answer, session_id
|
| 190 |
|
| 191 |
except Exception as e:
|