Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,10 @@ from sentence_transformers import util
|
|
| 7 |
import google.generativeai as genai
|
| 8 |
import chromadb
|
| 9 |
from langchain_chroma import Chroma
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# === Configuration ===
|
| 12 |
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
|
@@ -14,6 +18,40 @@ embedding_model = "models/embedding-001"
|
|
| 14 |
llm_model_name = "models/gemma-3-4b-it"
|
| 15 |
collection_name = "xeno_collection"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# === Load and Clean Knowledge Base ===
|
| 18 |
df_kb = pd.read_json("XENO_Uganda_KnowledgeBase_Advisory.json")
|
| 19 |
df_kb.dropna(subset=['Content'], inplace=True)
|
|
@@ -28,7 +66,8 @@ def prepare_documents(data):
|
|
| 28 |
"section": item.get("Section", ""),
|
| 29 |
"source": item.get("Source", ""),
|
| 30 |
"owner": item.get("Owner", ""),
|
| 31 |
-
"tag": item.get("Tag", "")
|
|
|
|
| 32 |
})
|
| 33 |
ids.append(item["ID"])
|
| 34 |
return documents, metadatas, ids
|
|
@@ -37,7 +76,7 @@ xeno_data_list = df_kb.to_dict('records')
|
|
| 37 |
documents, metadatas, ids = prepare_documents(xeno_data_list)
|
| 38 |
|
| 39 |
# === Setup ChromaDB ===
|
| 40 |
-
client = chromadb.PersistentClient(path="
|
| 41 |
try:
|
| 42 |
collection = client.get_collection(name=collection_name)
|
| 43 |
except:
|
|
@@ -112,6 +151,7 @@ Remember: This is a single-turn interaction. You have no memory of previous conv
|
|
| 112 |
def process_context(results, cosine_scores, max_results=2):
|
| 113 |
sorted_indices = np.argsort(cosine_scores)[::-1][:max_results]
|
| 114 |
formatted_context = ""
|
|
|
|
| 115 |
for i, idx in enumerate(sorted_indices, 1):
|
| 116 |
result = results[idx]
|
| 117 |
score = cosine_scores[idx]
|
|
@@ -119,7 +159,8 @@ def process_context(results, cosine_scores, max_results=2):
|
|
| 119 |
formatted_context += f"Q: {result.metadata.get('question', 'N/A')}\n"
|
| 120 |
formatted_context += f"A: {result.metadata.get('content', 'N/A')}\n"
|
| 121 |
formatted_context += "-" * 40 + "\n"
|
| 122 |
-
|
|
|
|
| 123 |
|
| 124 |
# === LLM Generation ===
|
| 125 |
def generate_xeno_response(context, question):
|
|
@@ -137,6 +178,7 @@ def generate_xeno_response(context, question):
|
|
| 137 |
# === Main Interface Logic ===
|
| 138 |
def get_context_and_answer(message, history):
|
| 139 |
if message.lower().strip() in {"hi", "hello", "hey"}:
|
|
|
|
| 140 |
return "Hello! How can I assist you with XENO services today?"
|
| 141 |
|
| 142 |
queried_results = retriever.invoke(message)
|
|
@@ -153,10 +195,13 @@ def get_context_and_answer(message, history):
|
|
| 153 |
|
| 154 |
# If none of the results have sufficient similarity, fallback
|
| 155 |
if max(cosine_scores) < 0.4:
|
|
|
|
| 156 |
return "I'm sorry, I couldn't find the specific information you're looking for in my knowledge base."
|
| 157 |
|
| 158 |
-
context = process_context(queried_results, cosine_scores)
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
|
| 161 |
# === Gradio UI ===
|
| 162 |
iface = gr.ChatInterface(
|
|
@@ -167,4 +212,4 @@ iface = gr.ChatInterface(
|
|
| 167 |
)
|
| 168 |
|
| 169 |
if __name__ == "__main__":
|
| 170 |
-
iface.launch()
|
|
|
|
| 7 |
import google.generativeai as genai
|
| 8 |
import chromadb
|
| 9 |
from langchain_chroma import Chroma
|
| 10 |
+
import gspread
|
| 11 |
+
from google.oauth2.service_account import Credentials
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
import json
|
| 14 |
|
| 15 |
# === Configuration ===
|
| 16 |
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
|
|
|
| 18 |
llm_model_name = "models/gemma-3-4b-it"
|
| 19 |
collection_name = "xeno_collection"
|
| 20 |
|
| 21 |
+
# === Google Sheets Setup for Hugging Face ===
|
| 22 |
+
# Use environment variable for Google Sheets credentials
|
| 23 |
+
def get_google_sheets_credentials():
|
| 24 |
+
credentials_json = os.environ.get("GOOGLE_SHEETS_CREDENTIALS")
|
| 25 |
+
if not credentials_json:
|
| 26 |
+
raise ValueError("GOOGLE_SHEETS_CREDENTIALS environment variable not set.")
|
| 27 |
+
credentials_dict = json.loads(credentials_json)
|
| 28 |
+
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
| 29 |
+
creds = Credentials.from_service_account_info(credentials_dict, scopes=scope)
|
| 30 |
+
return creds
|
| 31 |
+
|
| 32 |
+
# Authenticate with Google Sheets
|
| 33 |
+
client_gspread = gspread.authorize(get_google_sheets_credentials())
|
| 34 |
+
|
| 35 |
+
# Open the Google Sheet (replace 'Response_Log' with your Google Sheet name)
|
| 36 |
+
sheet = client_gspread.open("Response_Log").sheet1
|
| 37 |
+
|
| 38 |
+
def log_response(question, answer, source_ids):
|
| 39 |
+
"""
|
| 40 |
+
Log a question, answer, and source IDs to the Google Sheet.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
question (str): The question asked by the user.
|
| 44 |
+
answer (str): The answer provided by the model.
|
| 45 |
+
source_ids (str): Comma-separated list of source IDs used.
|
| 46 |
+
"""
|
| 47 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 48 |
+
row = [timestamp, question, answer, source_ids]
|
| 49 |
+
try:
|
| 50 |
+
sheet.append_row(row)
|
| 51 |
+
print(f"Logged: {question} | Source IDs: {source_ids}")
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Failed to log to Google Sheet: {e}")
|
| 54 |
+
|
| 55 |
# === Load and Clean Knowledge Base ===
|
| 56 |
df_kb = pd.read_json("XENO_Uganda_KnowledgeBase_Advisory.json")
|
| 57 |
df_kb.dropna(subset=['Content'], inplace=True)
|
|
|
|
| 66 |
"section": item.get("Section", ""),
|
| 67 |
"source": item.get("Source", ""),
|
| 68 |
"owner": item.get("Owner", ""),
|
| 69 |
+
"tag": item.get("Tag", ""),
|
| 70 |
+
"id": item["ID"] # Ensure ID is included in metadata
|
| 71 |
})
|
| 72 |
ids.append(item["ID"])
|
| 73 |
return documents, metadatas, ids
|
|
|
|
| 76 |
documents, metadatas, ids = prepare_documents(xeno_data_list)
|
| 77 |
|
| 78 |
# === Setup ChromaDB ===
|
| 79 |
+
client = chromadb.PersistentClient(path="/tmp/xeno_db") # Use /tmp for Hugging Face Spaces
|
| 80 |
try:
|
| 81 |
collection = client.get_collection(name=collection_name)
|
| 82 |
except:
|
|
|
|
| 151 |
def process_context(results, cosine_scores, max_results=2):
|
| 152 |
sorted_indices = np.argsort(cosine_scores)[::-1][:max_results]
|
| 153 |
formatted_context = ""
|
| 154 |
+
source_ids = []
|
| 155 |
for i, idx in enumerate(sorted_indices, 1):
|
| 156 |
result = results[idx]
|
| 157 |
score = cosine_scores[idx]
|
|
|
|
| 159 |
formatted_context += f"Q: {result.metadata.get('question', 'N/A')}\n"
|
| 160 |
formatted_context += f"A: {result.metadata.get('content', 'N/A')}\n"
|
| 161 |
formatted_context += "-" * 40 + "\n"
|
| 162 |
+
source_ids.append(result.metadata.get('id', 'N/A'))
|
| 163 |
+
return formatted_context, source_ids
|
| 164 |
|
| 165 |
# === LLM Generation ===
|
| 166 |
def generate_xeno_response(context, question):
|
|
|
|
| 178 |
# === Main Interface Logic ===
|
| 179 |
def get_context_and_answer(message, history):
|
| 180 |
if message.lower().strip() in {"hi", "hello", "hey"}:
|
| 181 |
+
log_response(message, "Hello! How can I assist you with XENO services today?", "N/A")
|
| 182 |
return "Hello! How can I assist you with XENO services today?"
|
| 183 |
|
| 184 |
queried_results = retriever.invoke(message)
|
|
|
|
| 195 |
|
| 196 |
# If none of the results have sufficient similarity, fallback
|
| 197 |
if max(cosine_scores) < 0.4:
|
| 198 |
+
log_response(message, "I'm sorry, I couldn't find the specific information you're looking for in my knowledge base.", "N/A")
|
| 199 |
return "I'm sorry, I couldn't find the specific information you're looking for in my knowledge base."
|
| 200 |
|
| 201 |
+
context, source_ids = process_context(queried_results, cosine_scores)
|
| 202 |
+
answer = generate_xeno_response(context, message)
|
| 203 |
+
log_response(message, answer, ", ".join(source_ids))
|
| 204 |
+
return answer
|
| 205 |
|
| 206 |
# === Gradio UI ===
|
| 207 |
iface = gr.ChatInterface(
|
|
|
|
| 212 |
)
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
| 215 |
+
iface.launch(share=False) # Set share=False for Hugging Face Spaces
|