oceddyyy commited on
Commit
1473519
·
verified ·
1 Parent(s): b46142c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -40
app.py CHANGED
@@ -6,10 +6,9 @@ import numpy as np
6
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
7
  from flask import Flask, request, jsonify
8
  import time
9
- import tempfile
10
 
11
 
12
- # === Setup cache directories ===
13
  os.environ["HF_HOME"] = "/tmp/.cache"
14
  os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
15
  os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
@@ -17,7 +16,7 @@ os.makedirs("/tmp/.cache", exist_ok=True)
17
  os.makedirs("/tmp/outputs", exist_ok=True)
18
 
19
 
20
- # === Initialize models ===
21
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
22
  token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
23
  inference_client = InferenceClient(
@@ -26,7 +25,7 @@ inference_client = InferenceClient(
26
  )
27
 
28
 
29
- # === Load dataset ===
30
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
31
  DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
32
  with open(DATASET_PATH, "r") as f:
@@ -37,7 +36,7 @@ answers = [item["answer"] for item in dataset]
37
  question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
38
 
39
 
40
- # === Feedback system setup ===
41
  feedback_data = []
42
  feedback_questions = []
43
  feedback_embeddings = None
@@ -71,9 +70,9 @@ except Exception as e:
71
  feedback_data = []
72
 
73
 
74
- # === Helper: upload file to HF ===
75
  def upload_file_to_hf(local_path, remote_filename):
76
- """Upload a single file to Hugging Face dataset repo."""
77
  hf_token = os.getenv("NEW_PUP_AI_Project")
78
  if not hf_token:
79
  raise ValueError("Hugging Face token not found in environment variables!")
@@ -86,16 +85,16 @@ def upload_file_to_hf(local_path, remote_filename):
86
  repo_type="dataset",
87
  token=hf_token
88
  )
89
- print(f"[UPLOAD] {remote_filename} uploaded successfully to Hugging Face.")
90
  except Exception as e:
91
- print(f"[ERROR] Upload failed for {remote_filename}: {e}")
92
 
93
 
94
- # === Chatbot core ===
95
  def chatbot_response(query, dev_mode_flag):
96
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
97
 
98
- # Check feedback-based matches first
99
  if feedback_embeddings is not None:
100
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
101
  best_idx = int(np.argmax(feedback_scores))
@@ -111,7 +110,7 @@ def chatbot_response(query, dev_mode_flag):
111
  if best_score >= dynamic_threshold:
112
  return matched_feedback["response"], "Feedback", 0.0
113
 
114
- # Otherwise, use main dataset
115
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
116
  top_k = 3
117
  top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
@@ -123,7 +122,7 @@ def chatbot_response(query, dev_mode_flag):
123
  matched_source = matched_item.get("source", "PUP Handbook")
124
  best_score = top_k_scores[0]
125
 
126
- # Developer mode (LLM synthesis)
127
  if dev_mode_flag:
128
  context = ""
129
  for i, item in enumerate(top_k_items):
@@ -180,7 +179,7 @@ def chatbot_response(query, dev_mode_flag):
180
  error_msg = f"[ERROR] HF inference failed: {e}"
181
  return f"(UnivAI+++ error: {error_msg})", matched_source, 0.0
182
 
183
- # Normal retrieval mode
184
  if best_score < 0.4:
185
  response = "Sorry, but the PUP handbook does not contain such information."
186
  else:
@@ -188,12 +187,13 @@ def chatbot_response(query, dev_mode_flag):
188
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
189
  else:
190
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
 
191
  return response.strip(), matched_source, 0.0
192
 
193
 
194
- # === Improved feedback recording (uploads only new/updated entries) ===
195
  def record_feedback(feedback_type, user_query, chatbot_response_text, comment=None):
196
- """Records feedback and uploads only new or updated entry."""
197
  global feedback_embeddings, feedback_questions
198
  matched = False
199
  new_embedding = embedding_model.encode([user_query], convert_to_tensor=True)
@@ -205,32 +205,19 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
205
  matched = True
206
  votes = {"positive": "upvotes", "negative": "downvotes"}
207
  item[votes[feedback_type]] = item.get(votes[feedback_type], 0) + 1
208
- # Only upload the updated item (not full dataset)
209
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
210
- json.dump([item], tempf, indent=4)
211
- tempf_path = tempf.name
212
- upload_file_to_hf(tempf_path, "latest_feedback_update.json")
213
- os.remove(tempf_path)
214
  break
215
 
216
  if not matched:
217
  entry = {
218
- "question": user_query,
219
- "response": chatbot_response_text,
220
  "feedback": feedback_type,
221
  "upvotes": 1 if feedback_type == "positive" else 0,
222
  "downvotes": 1 if feedback_type == "negative" else 0
223
  }
224
  feedback_data.append(entry)
225
 
226
- # Save only this new entry remotely
227
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
228
- json.dump([entry], tempf, indent=4)
229
- tempf_path = tempf.name
230
- upload_file_to_hf(tempf_path, "latest_feedback_entry.json")
231
- os.remove(tempf_path)
232
-
233
- # Always update local JSON + embeddings
234
  with open(feedback_path, "w") as f:
235
  json.dump(feedback_data, f, indent=4)
236
 
@@ -238,7 +225,10 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
238
  if feedback_questions:
239
  feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
240
 
241
- # Comment saving (optional)
 
 
 
242
  if comment and comment.strip():
243
  try:
244
  with open(COMMENTS_PATH, "r") as f:
@@ -258,15 +248,10 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
258
  with open(COMMENTS_PATH, "w") as f:
259
  json.dump(comments_list, f, indent=4)
260
 
261
- # Upload only the latest comment
262
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
263
- json.dump([comment_entry], tempf, indent=4)
264
- tempf_path = tempf.name
265
- upload_file_to_hf(tempf_path, "latest_comment_entry.json")
266
- os.remove(tempf_path)
267
 
268
 
269
- # === Flask API routes ===
270
  app = Flask(__name__)
271
 
272
  @app.route("/api/chat", methods=["POST"])
 
6
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
7
  from flask import Flask, request, jsonify
8
  import time
 
9
 
10
 
11
+ # Setup caching and directories
12
  os.environ["HF_HOME"] = "/tmp/.cache"
13
  os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
14
  os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
 
16
  os.makedirs("/tmp/outputs", exist_ok=True)
17
 
18
 
19
+ # Initialize models and clients
20
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
21
  token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
22
  inference_client = InferenceClient(
 
25
  )
26
 
27
 
28
+ # Load dataset
29
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
30
  DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
31
  with open(DATASET_PATH, "r") as f:
 
36
  question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
37
 
38
 
39
+ # Feedback system setup
40
  feedback_data = []
41
  feedback_questions = []
42
  feedback_embeddings = None
 
70
  feedback_data = []
71
 
72
 
73
+ # Upload helper
74
  def upload_file_to_hf(local_path, remote_filename):
75
+ """Helper to upload any file to Hugging Face dataset repo."""
76
  hf_token = os.getenv("NEW_PUP_AI_Project")
77
  if not hf_token:
78
  raise ValueError("Hugging Face token not found in environment variables!")
 
85
  repo_type="dataset",
86
  token=hf_token
87
  )
88
+ print(f"{remote_filename} uploaded to Hugging Face successfully.")
89
  except Exception as e:
90
+ print(f"Error uploading {remote_filename} to HF: {e}")
91
 
92
 
93
+ # Chatbot main logic
94
  def chatbot_response(query, dev_mode_flag):
95
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
96
 
97
+ # Check for feedback-based matches first
98
  if feedback_embeddings is not None:
99
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
100
  best_idx = int(np.argmax(feedback_scores))
 
110
  if best_score >= dynamic_threshold:
111
  return matched_feedback["response"], "Feedback", 0.0
112
 
113
+ # Otherwise, match from dataset
114
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
115
  top_k = 3
116
  top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
 
122
  matched_source = matched_item.get("source", "PUP Handbook")
123
  best_score = top_k_scores[0]
124
 
125
+ # Developer mode (with LLM generation)
126
  if dev_mode_flag:
127
  context = ""
128
  for i, item in enumerate(top_k_items):
 
179
  error_msg = f"[ERROR] HF inference failed: {e}"
180
  return f"(UnivAI+++ error: {error_msg})", matched_source, 0.0
181
 
182
+ # Regular retrieval-based response
183
  if best_score < 0.4:
184
  response = "Sorry, but the PUP handbook does not contain such information."
185
  else:
 
187
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
188
  else:
189
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
190
+
191
  return response.strip(), matched_source, 0.0
192
 
193
 
194
+ # FIXED FUNCTION: Records feedback correctly
195
  def record_feedback(feedback_type, user_query, chatbot_response_text, comment=None):
196
+ """Records user feedback and optional comment."""
197
  global feedback_embeddings, feedback_questions
198
  matched = False
199
  new_embedding = embedding_model.encode([user_query], convert_to_tensor=True)
 
205
  matched = True
206
  votes = {"positive": "upvotes", "negative": "downvotes"}
207
  item[votes[feedback_type]] = item.get(votes[feedback_type], 0) + 1
 
 
 
 
 
 
208
  break
209
 
210
  if not matched:
211
  entry = {
212
+ "question": user_query, # ✅ user’s question
213
+ "response": chatbot_response_text, # ✅ chatbot’s answer
214
  "feedback": feedback_type,
215
  "upvotes": 1 if feedback_type == "positive" else 0,
216
  "downvotes": 1 if feedback_type == "negative" else 0
217
  }
218
  feedback_data.append(entry)
219
 
220
+ # Save locally
 
 
 
 
 
 
 
221
  with open(feedback_path, "w") as f:
222
  json.dump(feedback_data, f, indent=4)
223
 
 
225
  if feedback_questions:
226
  feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
227
 
228
+ # Upload to HF
229
+ upload_file_to_hf(feedback_path, "feedback.json")
230
+
231
+ # Save optional comments
232
  if comment and comment.strip():
233
  try:
234
  with open(COMMENTS_PATH, "r") as f:
 
248
  with open(COMMENTS_PATH, "w") as f:
249
  json.dump(comments_list, f, indent=4)
250
 
251
+ upload_file_to_hf(COMMENTS_PATH, "Comments.json")
 
 
 
 
 
252
 
253
 
254
+ # Flask API setup
255
  app = Flask(__name__)
256
 
257
  @app.route("/api/chat", methods=["POST"])