Sebunya commited on
Commit
25eb473
·
verified ·
1 Parent(s): ab0c161
Files changed (1) hide show
  1. app.py +81 -50
app.py CHANGED
@@ -1,64 +1,95 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
8
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
 
 
25
 
26
- messages.append({"role": "user", "content": message})
 
27
 
28
- response = ""
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
42
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
1
+ import os
2
  import gradio as gr
3
+ import pandas as pd
4
+ import torch
5
+ import numpy as np
6
+ from sentence_transformers import util
7
+ import google.generativeai as genai
8
+ import chromadb
9
+ from langchain_chroma import Chroma
10
 
11
+ # Load Gemini API key from environment variable
12
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
13
 
14
+ # Load and clean knowledge base
15
+ df_kb = pd.read_json("XENO_Uganda_KnowledgeBase_Advisory.json")
16
+ df_kb.dropna(subset=['Content'], inplace=True)
 
 
 
 
 
 
17
 
18
+ def prepare_documents(data):
19
+ documents, metadatas, ids = [], [], []
20
+ for item in data:
21
+ documents.append(f"Question: {item['Question']}\nAnswer: {item['Content']}")
22
+ metadatas.append({
23
+ "question": item["Question"],
24
+ "content": item["Content"],
25
+ "section": item.get("Section", ""),
26
+ "source": item.get("Source", ""),
27
+ "owner": item.get("Owner", ""),
28
+ "tag": item.get("Tag", "")
29
+ })
30
+ ids.append(item["ID"])
31
+ return documents, metadatas, ids
32
 
33
+ xeno_data_list = df_kb.to_dict('records')
34
+ documents, metadatas, ids = prepare_documents(xeno_data_list)
35
 
36
+ # ChromaDB setup
37
+ client = chromadb.PersistentClient(path="./xeno_db")
38
+ collection_name = "xeno_collection"
39
 
40
+ try:
41
+ collection = client.get_collection(name=collection_name)
42
+ except:
43
+ collection = client.create_collection(name=collection_name)
44
+ collection.add(documents=documents, metadatas=metadatas, ids=ids)
 
 
 
45
 
46
+ vector_store = Chroma(client=client, collection_name=collection_name)
47
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
48
 
49
+ embedding_model = "models/embedding-001"
50
+ llm_model_name = "models/gemma-3-4b-it"
51
 
52
+ SYSTEM_PROMPT = """
53
+ You are XENO Support Assistant, an AI-powered helpful and professional customer service representative.
54
+ Use only the information provided in the knowledge base context to answer user queries.
55
+ Do not hallucinate. If context doesn't contain relevant info, say so.
56
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ def generate_xeno_response(context, question):
59
+ model = genai.GenerativeModel(llm_model_name)
60
+ user_prompt = f"""{SYSTEM_PROMPT}
61
+ ### CONTEXT ###
62
+ {context}
63
+ ### QUESTION ###
64
+ {question}"""
65
+ response = model.generate_content(user_prompt)
66
+ return response.text.strip()
67
+
68
+ def get_context_and_answer(query):
69
+ queried_results = retriever.invoke(query)
70
+ query_embedding = genai.embed_content(model=embedding_model,
71
+ content=query,
72
+ task_type="retrieval_query")['embedding']
73
+ cosine_scores = []
74
+ for doc in queried_results:
75
+ doc_embedding = genai.embed_content(model=embedding_model,
76
+ content=doc.page_content,
77
+ task_type="retrieval_document")['embedding']
78
+ cos_sim = util.cos_sim(torch.tensor(query_embedding).float(), torch.tensor(doc_embedding).float())[0][0].item()
79
+ cosine_scores.append(cos_sim)
80
+ context = ""
81
+ top_docs = sorted(zip(queried_results, cosine_scores), key=lambda x: x[1], reverse=True)[:2]
82
+ for i, (result, score) in enumerate(top_docs, 1):
83
+ context += f"Knowledge Entry {i}:\nQ: {result.metadata['question']}\nA: {result.metadata['content']}\n{'-'*40}\n"
84
+ return generate_xeno_response(context, query)
85
+
86
+ iface = gr.ChatInterface(
87
+ fn=get_context_and_answer,
88
+ title="XENO Support Assistant",
89
+ description="Ask anything about XENO's financial services.",
90
+ theme="soft"
91
+ )
92
 
93
  if __name__ == "__main__":
94
+ iface.launch()
95
+