kaburia commited on
Commit
8e8f061
·
1 Parent(s): f220545

log the data

Browse files
Files changed (1) hide show
  1. app.py +80 -88
app.py CHANGED
@@ -5,7 +5,10 @@ from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import json
 
8
 
 
 
9
 
10
  with open("config.json") as f:
11
  config = json.load(f)
@@ -13,86 +16,12 @@ with open("config.json") as f:
13
  # load the results
14
  token = config["hf"] + config["token"]
15
 
16
-
17
- # === Step 0: Download FAISS index files if not present ===
18
-
19
- def download_faiss_index(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"):
20
- os.makedirs(local_folder, exist_ok=True)
21
-
22
- index_faiss_path = os.path.join(local_folder, "index.faiss")
23
- index_pkl_path = os.path.join(local_folder, "index.pkl")
24
-
25
- if not os.path.exists(index_faiss_path):
26
- print("Downloading index.faiss from Hugging Face Dataset...")
27
- hf_hub_download(
28
- repo_id=repo_id,
29
- filename="index.faiss",
30
- repo_type="dataset", # 🛑 MUST add this line
31
- local_dir=local_folder,
32
- local_dir_use_symlinks=False,
33
- )
34
-
35
- if not os.path.exists(index_pkl_path):
36
- print("Downloading index.pkl from Hugging Face Dataset...")
37
- hf_hub_download(
38
- repo_id=repo_id,
39
- filename="index.pkl",
40
- repo_type="dataset", # 🛑 MUST add this line
41
- local_dir=local_folder,
42
- local_dir_use_symlinks=False,
43
- )
44
-
45
- # === Step 1: Load Vectorstore ===
46
-
47
- def load_vectorstore(index_path="faiss_index"):
48
- embedding_model = HuggingFaceEmbeddings(
49
- model_name="sentence-transformers/all-MiniLM-L6-v2"
50
- )
51
- db = FAISS.load_local(
52
- index_path,
53
- embeddings=embedding_model,
54
- allow_dangerous_deserialization=True
55
- )
56
- return db
57
-
58
- # Download FAISS index if needed
59
- download_faiss_index()
60
-
61
- vectorstore = load_vectorstore()
62
-
63
-
64
  # === Step 2: Setup HuggingFace Inference API ===
65
 
66
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta",
67
  token=token)
68
 
69
 
70
- # === Step 3: Build Retrieval-Augmented Response Function ===
71
-
72
- # def retrieve_context(question, k=5):
73
- # retriever = vectorstore.as_retriever(search_kwargs={"k": k})
74
- # docs = retriever.get_relevant_documents(question)
75
- # context = "\n\n".join(doc.page_content for doc in docs)
76
- # return context
77
-
78
- def cosine_to_prob(score):
79
- # Convert cosine similarity from [-1, 1] to [0, 1]
80
- return (score + 1) / 2
81
-
82
- def retrieve_context(question, p=5, threshold=0.5):
83
- # Get docs with raw scores
84
- results = vectorstore.similarity_search_with_score(question, k=50) # get more than needed
85
-
86
- # Filter for "probability" above threshold
87
- filtered = [(doc, score) for doc, score in results if cosine_to_prob(score) > threshold]
88
-
89
- # Sort by score descending and take top-p
90
- top_p_docs = sorted(filtered, key=lambda x: x[1], reverse=True)[:p]
91
-
92
- # Join content for prompt
93
- context = "\n\n".join(doc.page_content for doc, _ in top_p_docs)
94
- return context
95
-
96
  def detect_intent(message: str) -> str:
97
  """Classify the message as 'small_talk' or 'info_query' using the model."""
98
  prompt = f"""You are a classifier. Categorize the following user message as either 'small_talk' or 'info_query'.
@@ -107,39 +36,43 @@ def detect_intent(message: str) -> str:
107
  return "info_query"
108
 
109
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
110
  intent = detect_intent(message)
111
 
 
112
  messages = []
 
113
  if intent == "small_talk":
114
- # Free conversation
115
  messages.append({"role": "system", "content": "You are a friendly assistant. Talk naturally and helpfully."})
116
  else:
117
- # Retrieval + system constraints
118
- context = retrieve_context(message, p=5, threshold=0.5)
119
  messages.append({"role": "system", "content": system_message})
120
- message = f"""Use the following context to answer the question.
121
 
122
- - ONLY quote exact text from the context.
123
- - Do NOT summarize, paraphrase, or infer anything.
124
- - If no answer is found, respond: "The answer is not in the provided context."
125
 
126
- Context:
127
- {context}
128
 
129
- Question: {message}"""
 
 
130
 
131
- # Add chat history
132
  for user, assistant in history:
133
  if user:
134
  messages.append({"role": "user", "content": user})
135
  if assistant:
136
  messages.append({"role": "assistant", "content": assistant})
137
 
138
- # Final user input
139
- messages.append({"role": "user", "content": message})
140
 
141
- # Stream response
142
  response = ""
 
143
  for chunk in client.chat_completion(
144
  messages=messages,
145
  max_tokens=max_tokens,
@@ -150,8 +83,67 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
150
  token = chunk.choices[0].delta.content
151
  if token:
152
  response += token
 
153
  yield response
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  # def respond(message, history,
157
  # system_message, max_tokens,
 
5
  from langchain.vectorstores import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import json
8
+ from utils.helpers import retrieve_context, upload_log_to_hf, log_interaction_hf
9
 
10
+ turn_counter = 0
11
+ UPLOAD_INTERVAL = 5
12
 
13
  with open("config.json") as f:
14
  config = json.load(f)
 
16
  # load the results
17
  token = config["hf"] + config["token"]
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # === Step 2: Setup HuggingFace Inference API ===
20
 
21
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta",
22
  token=token)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def detect_intent(message: str) -> str:
26
  """Classify the message as 'small_talk' or 'info_query' using the model."""
27
  prompt = f"""You are a classifier. Categorize the following user message as either 'small_talk' or 'info_query'.
 
36
  return "info_query"
37
 
38
  def respond(message, history, system_message, max_tokens, temperature, top_p):
39
+ global turn_counter
40
  intent = detect_intent(message)
41
 
42
+ original_user_message = message # preserve the real user input for logging
43
  messages = []
44
+
45
  if intent == "small_talk":
 
46
  messages.append({"role": "system", "content": "You are a friendly assistant. Talk naturally and helpfully."})
47
  else:
48
+ context = retrieve_context(original_user_message, p=5, threshold=0.5)
 
49
  messages.append({"role": "system", "content": system_message})
50
+ prompt = f"""Use the following context to answer the question.
51
 
52
+ - ONLY quote exact text from the context.
53
+ - Do NOT summarize, paraphrase, or infer anything.
54
+ - If no answer is found, respond: "The answer is not in the provided context."
55
 
56
+ Context:
57
+ {context}
58
 
59
+ Question: {original_user_message}
60
+ """
61
+ original_user_message = prompt # what gets sent to the model
62
 
63
+ # Load prior chat memory
64
  for user, assistant in history:
65
  if user:
66
  messages.append({"role": "user", "content": user})
67
  if assistant:
68
  messages.append({"role": "assistant", "content": assistant})
69
 
70
+ # Add current turn
71
+ messages.append({"role": "user", "content": original_user_message})
72
 
73
+ # Generate and stream response
74
  response = ""
75
+ full_response = ""
76
  for chunk in client.chat_completion(
77
  messages=messages,
78
  max_tokens=max_tokens,
 
83
  token = chunk.choices[0].delta.content
84
  if token:
85
  response += token
86
+ full_response += token
87
  yield response
88
 
89
+ # Log this interaction
90
+ log_interaction_hf(message, full_response)
91
+
92
+ # Upload logs to Hugging Face every N turns
93
+ turn_counter += 1
94
+ if turn_counter % UPLOAD_INTERVAL == 0:
95
+ try:
96
+ upload_log_to_hf(token)
97
+ except Exception as e:
98
+ print(f"❌ Log upload failed: {e}")
99
+
100
+
101
+ # def respond(message, history, system_message, max_tokens, temperature, top_p):
102
+ # intent = detect_intent(message)
103
+
104
+ # messages = []
105
+ # if intent == "small_talk":
106
+ # # Free conversation
107
+ # messages.append({"role": "system", "content": "You are a friendly assistant. Talk naturally and helpfully."})
108
+ # else:
109
+ # # Retrieval + system constraints
110
+ # context = retrieve_context(message, p=5, threshold=0.5)
111
+ # messages.append({"role": "system", "content": system_message})
112
+ # message = f"""Use the following context to answer the question.
113
+
114
+ # - ONLY quote exact text from the context.
115
+ # - Do NOT summarize, paraphrase, or infer anything.
116
+ # - If no answer is found, respond: "The answer is not in the provided context."
117
+
118
+ # Context:
119
+ # {context}
120
+
121
+ # Question: {message}"""
122
+
123
+ # # Add chat history
124
+ # for user, assistant in history:
125
+ # if user:
126
+ # messages.append({"role": "user", "content": user})
127
+ # if assistant:
128
+ # messages.append({"role": "assistant", "content": assistant})
129
+
130
+ # # Final user input
131
+ # messages.append({"role": "user", "content": message})
132
+
133
+ # # Stream response
134
+ # response = ""
135
+ # for chunk in client.chat_completion(
136
+ # messages=messages,
137
+ # max_tokens=max_tokens,
138
+ # temperature=temperature,
139
+ # top_p=top_p,
140
+ # stream=True,
141
+ # ):
142
+ # token = chunk.choices[0].delta.content
143
+ # if token:
144
+ # response += token
145
+ # yield response
146
+
147
 
148
  # def respond(message, history,
149
  # system_message, max_tokens,