oceddyyy commited on
Commit
a5f699e
·
verified ·
1 Parent(s): 2b2b0ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -135
app.py CHANGED
@@ -1,30 +1,27 @@
1
- import gradio as gr
 
 
 
 
 
2
  import json
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
  import os
7
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
8
-
9
- PUP_Themed_css = """
10
- html, body, .gradio-container, .gr-app {
11
- height: 100% !important;
12
- margin: 0 !important;
13
- padding: 0 !important;
14
- background: linear-gradient(to bottom right, #800000, #ff0000, #ffeb3b, #ffa500) !important;
15
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
16
- color: #1b4332 !important;
17
- }
18
- """
19
 
20
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
21
- inference_token = os.getenv("HF_TOKEN") or os.getenv("PUP_AI_Chatbot_Token")
22
  inference_client = InferenceClient(
23
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
24
  token=inference_token
25
  )
26
 
27
- with open("dataset.json", "r") as f:
 
 
28
  dataset = json.load(f)
29
 
30
  questions = [item["question"] for item in dataset]
@@ -37,11 +34,11 @@ feedback_questions = []
37
  feedback_embeddings = None
38
  dev_mode = {"enabled": False}
39
 
40
- feedback_path = "outputs/feedback.json"
41
- os.makedirs("outputs", exist_ok=True)
42
 
43
  try:
44
- hf_token = os.getenv("PUP_AI_Chatbot_Token")
45
  downloaded_path = hf_hub_download(
46
  repo_id="oceddyyy/University_Inquiries_Feedback",
47
  filename="feedback.json",
@@ -58,11 +55,11 @@ try:
58
  json.dump(feedback_data, f_local, indent=4)
59
 
60
  except Exception as e:
61
- print(f"[Startup] No feedback loaded from HF: {e}")
62
  feedback_data = []
63
 
64
  def upload_feedback_to_hf():
65
- hf_token = os.getenv("PUP_AI_Chatbot_Token")
66
  if not hf_token:
67
  raise ValueError("Hugging Face token not found in environment variables!")
68
 
@@ -78,9 +75,10 @@ def upload_feedback_to_hf():
78
  except Exception as e:
79
  print(f"Error uploading feedback to HF: {e}")
80
 
81
- def chatbot_response(query, chat_history):
82
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
83
 
 
84
  if feedback_embeddings is not None:
85
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
86
  best_idx = int(np.argmax(feedback_scores))
@@ -95,134 +93,123 @@ def chatbot_response(query, chat_history):
95
 
96
  if best_score >= dynamic_threshold:
97
  response = matched_feedback["response"]
98
- chat_history.append((query, response))
99
- return "", chat_history, gr.update(visible=True)
100
 
 
101
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
102
  best_idx = int(np.argmax(similarity_scores))
103
  best_score = similarity_scores[best_idx]
104
- matched_item = dataset[best_idx] # Changed this to get full entry including month/year
105
  matched_a = matched_item.get("answer", "")
106
 
107
- if best_score < 0.4:
108
- response = "Sorry, but the PUP handbook does not contain such information."
109
- else:
110
- if dev_mode["enabled"]:
111
- prompt = (
112
- f"A student asked:\n\"{query}\"\n\n"
113
- f"Relevant handbook info:\n\"{matched_a}\"\n\n"
114
- f"Please answer based only on this handbook content."
115
- )
116
- try:
117
- response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
118
- except Exception as e:
119
- print(f"[ERROR] HF inference failed: {e}")
120
- response = f"(Fallback) {matched_a}"
121
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if "month" in matched_item and "year" in matched_item:
123
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
124
  else:
125
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
 
126
 
127
- chat_history.append((query, response.strip()))
128
- return "", chat_history, gr.update(visible=True)
 
 
 
 
 
 
129
 
 
130
 
131
- def record_feedback(feedback, chat_history):
132
  global feedback_embeddings, feedback_questions
133
- if chat_history:
134
- last_query, last_response = chat_history[-1]
135
- matched = False
136
- new_embedding = embedding_model.encode([last_query], convert_to_tensor=True)
137
-
138
- for item in feedback_data:
139
- existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
140
- similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
141
- if similarity >= 0.8 and item["response"] == last_response:
142
- matched = True
143
- votes = {"positive": "upvotes", "negative": "downvotes"}
144
- item[votes[feedback]] = item.get(votes[feedback], 0) + 1
145
- break
146
-
147
- if not matched:
148
- entry = {
149
- "question": last_query,
150
- "response": last_response,
151
- "feedback": feedback,
152
- "upvotes": 1 if feedback == "positive" else 0,
153
- "downvotes": 1 if feedback == "negative" else 0
154
- }
155
- feedback_data.append(entry)
156
-
157
- with open(feedback_path, "w") as f:
158
- json.dump(feedback_data, f, indent=4)
159
-
160
- feedback_questions = [item["question"] for item in feedback_data]
161
- if feedback_questions:
162
- feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
163
-
164
- upload_feedback_to_hf()
165
-
166
- return gr.update(visible=False)
167
-
168
- with gr.Blocks(css=PUP_Themed_css, title="University Handbook AI Chatbot") as demo:
169
- gr.Markdown(
170
- """
171
- <div style='
172
- background-color: var(--block-background-fill);
173
- border-radius: 16px;
174
- padding: 24px 16px;
175
- margin-bottom: 24px;
176
- box-shadow: 0 6px 16px rgba(0, 0, 0, 0.15);
177
- max-width: 700px;
178
- margin-left: auto;
179
- margin-right: auto;
180
- text-align: center;
181
- color: var(--text-color);'>
182
- <h1 style='font-size: 2.2rem; margin: 0;'>University Inquiries AI Chatbot</h1>
183
- </div>
184
- """
185
- )
186
-
187
- state = gr.State(chat_history)
188
- chatbot_ui = gr.Chatbot(label="Chat", show_label=False)
189
-
190
- with gr.Row():
191
- dev_btn = gr.Button("DevMode 🔐")
192
- password_box = gr.Textbox(placeholder="Enter Dev password", type="password", visible=False, show_label=False)
193
- confirm_btn = gr.Button("Confirm", visible=False)
194
-
195
- dev_pass = os.getenv("DEV_MODE_PASSWORD", "letmein")
196
-
197
- def show_password_input():
198
- return gr.update(visible=True), gr.update(visible=True)
199
-
200
- def enable_devmode(password_input):
201
- if password_input == dev_pass:
202
- dev_mode["enabled"] = True
203
- return gr.update(visible=False), gr.update(visible=False), gr.update(value="DevMode ✅", interactive=False)
204
- return gr.update(visible=True), gr.update(visible=True), gr.update(value="Wrong password. Try again.")
205
-
206
- dev_btn.click(show_password_input, outputs=[password_box, confirm_btn])
207
- confirm_btn.click(enable_devmode, inputs=[password_box], outputs=[password_box, confirm_btn, dev_btn])
208
-
209
- with gr.Row():
210
- query_input = gr.Textbox(placeholder="Type your question here...", show_label=False)
211
- submit_btn = gr.Button("Submit")
212
-
213
- with gr.Row(visible=False) as feedback_row:
214
- gr.Markdown("Was this helpful?")
215
- thumbs_up = gr.Button("👍")
216
- thumbs_down = gr.Button("👎")
217
-
218
- def handle_submit(message, chat_state):
219
- return chatbot_response(message, chat_state)
220
-
221
- submit_btn.click(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
222
- query_input.submit(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
223
-
224
- thumbs_up.click(lambda state: record_feedback("positive", state), inputs=[state], outputs=[feedback_row])
225
- thumbs_down.click(lambda state: record_feedback("negative", state), inputs=[state], outputs=[feedback_row])
226
 
227
  if __name__ == "__main__":
228
- demo.launch()
 
1
+ import os
2
+ os.environ["HF_HOME"] = "/tmp/.cache"
3
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
4
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
5
+ os.makedirs("/tmp/.cache", exist_ok=True)
6
+
7
  import json
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import numpy as np
11
  import os
12
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
13
+ from flask import Flask, request, jsonify
 
 
 
 
 
 
 
 
 
 
14
 
15
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
16
+ inference_token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
17
  inference_client = InferenceClient(
18
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
19
  token=inference_token
20
  )
21
 
22
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
23
+ DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
24
+ with open(DATASET_PATH, "r") as f:
25
  dataset = json.load(f)
26
 
27
  questions = [item["question"] for item in dataset]
 
34
  feedback_embeddings = None
35
  dev_mode = {"enabled": False}
36
 
37
+ feedback_path = "/tmp/outputs/feedback.json"
38
+ os.makedirs("/tmp/outputs", exist_ok=True)
39
 
40
  try:
41
+ hf_token = os.getenv("NEW_PUP_AI_Project")
42
  downloaded_path = hf_hub_download(
43
  repo_id="oceddyyy/University_Inquiries_Feedback",
44
  filename="feedback.json",
 
55
  json.dump(feedback_data, f_local, indent=4)
56
 
57
  except Exception as e:
58
+ print(f"[Startup] Feedback not loaded from Hugging Face. Using local only. Reason: {e}")
59
  feedback_data = []
60
 
61
  def upload_feedback_to_hf():
62
+ hf_token = os.getenv("NEW_PUP_AI_Project")
63
  if not hf_token:
64
  raise ValueError("Hugging Face token not found in environment variables!")
65
 
 
75
  except Exception as e:
76
  print(f"Error uploading feedback to HF: {e}")
77
 
78
+ def chatbot_response(query, dev_mode_flag):
79
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
80
 
81
+ # Feedback logic (optional, can keep as is)
82
  if feedback_embeddings is not None:
83
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
84
  best_idx = int(np.argmax(feedback_scores))
 
93
 
94
  if best_score >= dynamic_threshold:
95
  response = matched_feedback["response"]
96
+ return response
 
97
 
98
+ # Find most relevant handbook answer
99
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
100
  best_idx = int(np.argmax(similarity_scores))
101
  best_score = similarity_scores[best_idx]
102
+ matched_item = dataset[best_idx]
103
  matched_a = matched_item.get("answer", "")
104
 
105
+ # UnivAI+++ mode: always use Mistral LLM for response
106
+ if dev_mode_flag:
107
+ # Improved prompt: ask LLM to answer in its own words, based on handbook info
108
+ prompt = (
109
+ f"You are an expert university assistant. "
110
+ f"A student asked: \"{query}\"\n"
111
+ f"Here is the most relevant handbook information:\n\"{matched_a}\"\n"
112
+ f"Using only the information above, answer the student's question in your own words. "
113
+ f"If the handbook info is not relevant, say you don't know."
114
+ )
115
+ print("[DEBUG] Calling LLM with prompt:", prompt) # Logging
116
+
117
+ try:
118
+ llm_response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
119
+ print("[DEBUG] LLM raw response:", llm_response) # Logging
120
+
121
+ # Robust extraction of generated text
122
+ if hasattr(llm_response, "generated_text"):
123
+ response = llm_response.generated_text
124
+ elif isinstance(llm_response, dict) and "generated_text" in llm_response:
125
+ response = llm_response["generated_text"]
126
+ else:
127
+ response = str(llm_response)
128
+
129
+ # If LLM returns empty or just repeats handbook, fallback
130
+ if not response.strip() or response.strip() == matched_a.strip():
131
+ print("[DEBUG] LLM response empty or same as handbook, using fallback.")
132
+ if "month" in matched_item and "year" in matched_item:
133
+ response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
134
+ else:
135
+ response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
136
+ except Exception as e:
137
+ error_msg = f"[ERROR] HF inference failed: {e}"
138
+ print(error_msg)
139
+ # Fallback to handbook answer if LLM fails
140
  if "month" in matched_item and "year" in matched_item:
141
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
142
  else:
143
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
144
+ return response.strip()
145
 
146
+ # UnivAI mode: use only sentence-transformers
147
+ if best_score < 0.4:
148
+ response = "Sorry, but the PUP handbook does not contain such information."
149
+ else:
150
+ if "month" in matched_item and "year" in matched_item:
151
+ response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
152
+ else:
153
+ response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
154
 
155
+ return response.strip()
156
 
157
+ def record_feedback(feedback, query, response):
158
  global feedback_embeddings, feedback_questions
159
+ matched = False
160
+ new_embedding = embedding_model.encode([query], convert_to_tensor=True)
161
+
162
+ for item in feedback_data:
163
+ existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
164
+ similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
165
+ if similarity >= 0.8 and item["response"] == response:
166
+ matched = True
167
+ votes = {"positive": "upvotes", "negative": "downvotes"}
168
+ item[votes[feedback]] = item.get(votes[feedback], 0) + 1
169
+ break
170
+
171
+ if not matched:
172
+ entry = {
173
+ "question": query,
174
+ "response": response,
175
+ "feedback": feedback,
176
+ "upvotes": 1 if feedback == "positive" else 0,
177
+ "downvotes": 1 if feedback == "negative" else 0
178
+ }
179
+ feedback_data.append(entry)
180
+
181
+ with open(feedback_path, "w") as f:
182
+ json.dump(feedback_data, f, indent=4)
183
+
184
+ feedback_questions = [item["question"] for item in feedback_data]
185
+ if feedback_questions:
186
+ feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
187
+
188
+ upload_feedback_to_hf()
189
+
190
+ app = Flask(__name__)
191
+
192
+ @app.route("/api/chat", methods=["POST"])
193
+ def chat():
194
+ data = request.json
195
+ query = data.get("query", "")
196
+ dev = data.get("dev_mode", False)
197
+ dev_mode["enabled"] = dev
198
+ response = chatbot_response(query, dev)
199
+ return jsonify({"response": response})
200
+
201
+ @app.route("/api/feedback", methods=["POST"])
202
+ def feedback():
203
+ data = request.json
204
+ query = data.get("query", "")
205
+ response = data.get("response", "")
206
+ feedback_type = data.get("feedback", "")
207
+ record_feedback(feedback_type, query, response)
208
+ return jsonify({"status": "success"})
209
+
210
+ @app.route("/", methods=["GET"])
211
+ def index():
212
+ return "University Inquiries AI Chatbot API. Use POST /chat or /feedback.", 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  if __name__ == "__main__":
215
+ app.run(host="0.0.0.0", port=7861)