oceddyyy commited on
Commit
55b9a1f
Β·
verified Β·
1 Parent(s): a5f699e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -122
app.py CHANGED
@@ -1,27 +1,30 @@
1
- import os
2
- os.environ["HF_HOME"] = "/tmp/.cache"
3
- os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
4
- os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
5
- os.makedirs("/tmp/.cache", exist_ok=True)
6
-
7
  import json
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import numpy as np
11
  import os
12
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
13
- from flask import Flask, request, jsonify
 
 
 
 
 
 
 
 
 
 
14
 
15
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
16
- inference_token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
17
  inference_client = InferenceClient(
18
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
19
  token=inference_token
20
  )
21
 
22
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
23
- DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
24
- with open(DATASET_PATH, "r") as f:
25
  dataset = json.load(f)
26
 
27
  questions = [item["question"] for item in dataset]
@@ -34,11 +37,11 @@ feedback_questions = []
34
  feedback_embeddings = None
35
  dev_mode = {"enabled": False}
36
 
37
- feedback_path = "/tmp/outputs/feedback.json"
38
- os.makedirs("/tmp/outputs", exist_ok=True)
39
 
40
  try:
41
- hf_token = os.getenv("NEW_PUP_AI_Project")
42
  downloaded_path = hf_hub_download(
43
  repo_id="oceddyyy/University_Inquiries_Feedback",
44
  filename="feedback.json",
@@ -55,11 +58,11 @@ try:
55
  json.dump(feedback_data, f_local, indent=4)
56
 
57
  except Exception as e:
58
- print(f"[Startup] Feedback not loaded from Hugging Face. Using local only. Reason: {e}")
59
  feedback_data = []
60
 
61
  def upload_feedback_to_hf():
62
- hf_token = os.getenv("NEW_PUP_AI_Project")
63
  if not hf_token:
64
  raise ValueError("Hugging Face token not found in environment variables!")
65
 
@@ -75,10 +78,9 @@ def upload_feedback_to_hf():
75
  except Exception as e:
76
  print(f"Error uploading feedback to HF: {e}")
77
 
78
- def chatbot_response(query, dev_mode_flag):
79
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
80
 
81
- # Feedback logic (optional, can keep as is)
82
  if feedback_embeddings is not None:
83
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
84
  best_idx = int(np.argmax(feedback_scores))
@@ -93,123 +95,134 @@ def chatbot_response(query, dev_mode_flag):
93
 
94
  if best_score >= dynamic_threshold:
95
  response = matched_feedback["response"]
96
- return response
 
97
 
98
- # Find most relevant handbook answer
99
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
100
  best_idx = int(np.argmax(similarity_scores))
101
  best_score = similarity_scores[best_idx]
102
- matched_item = dataset[best_idx]
103
  matched_a = matched_item.get("answer", "")
104
 
105
- # UnivAI+++ mode: always use Mistral LLM for response
106
- if dev_mode_flag:
107
- # Improved prompt: ask LLM to answer in its own words, based on handbook info
108
- prompt = (
109
- f"You are an expert university assistant. "
110
- f"A student asked: \"{query}\"\n"
111
- f"Here is the most relevant handbook information:\n\"{matched_a}\"\n"
112
- f"Using only the information above, answer the student's question in your own words. "
113
- f"If the handbook info is not relevant, say you don't know."
114
- )
115
- print("[DEBUG] Calling LLM with prompt:", prompt) # Logging
116
-
117
- try:
118
- llm_response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
119
- print("[DEBUG] LLM raw response:", llm_response) # Logging
120
-
121
- # Robust extraction of generated text
122
- if hasattr(llm_response, "generated_text"):
123
- response = llm_response.generated_text
124
- elif isinstance(llm_response, dict) and "generated_text" in llm_response:
125
- response = llm_response["generated_text"]
126
- else:
127
- response = str(llm_response)
128
-
129
- # If LLM returns empty or just repeats handbook, fallback
130
- if not response.strip() or response.strip() == matched_a.strip():
131
- print("[DEBUG] LLM response empty or same as handbook, using fallback.")
132
- if "month" in matched_item and "year" in matched_item:
133
- response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
134
- else:
135
- response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
136
- except Exception as e:
137
- error_msg = f"[ERROR] HF inference failed: {e}"
138
- print(error_msg)
139
- # Fallback to handbook answer if LLM fails
140
  if "month" in matched_item and "year" in matched_item:
141
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
142
  else:
143
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
144
- return response.strip()
145
 
146
- # UnivAI mode: use only sentence-transformers
147
- if best_score < 0.4:
148
- response = "Sorry, but the PUP handbook does not contain such information."
149
- else:
150
- if "month" in matched_item and "year" in matched_item:
151
- response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
152
- else:
153
- response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
154
 
155
- return response.strip()
156
 
157
- def record_feedback(feedback, query, response):
158
  global feedback_embeddings, feedback_questions
159
- matched = False
160
- new_embedding = embedding_model.encode([query], convert_to_tensor=True)
161
-
162
- for item in feedback_data:
163
- existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
164
- similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
165
- if similarity >= 0.8 and item["response"] == response:
166
- matched = True
167
- votes = {"positive": "upvotes", "negative": "downvotes"}
168
- item[votes[feedback]] = item.get(votes[feedback], 0) + 1
169
- break
170
-
171
- if not matched:
172
- entry = {
173
- "question": query,
174
- "response": response,
175
- "feedback": feedback,
176
- "upvotes": 1 if feedback == "positive" else 0,
177
- "downvotes": 1 if feedback == "negative" else 0
178
- }
179
- feedback_data.append(entry)
180
-
181
- with open(feedback_path, "w") as f:
182
- json.dump(feedback_data, f, indent=4)
183
-
184
- feedback_questions = [item["question"] for item in feedback_data]
185
- if feedback_questions:
186
- feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
187
-
188
- upload_feedback_to_hf()
189
-
190
- app = Flask(__name__)
191
-
192
- @app.route("/api/chat", methods=["POST"])
193
- def chat():
194
- data = request.json
195
- query = data.get("query", "")
196
- dev = data.get("dev_mode", False)
197
- dev_mode["enabled"] = dev
198
- response = chatbot_response(query, dev)
199
- return jsonify({"response": response})
200
-
201
- @app.route("/api/feedback", methods=["POST"])
202
- def feedback():
203
- data = request.json
204
- query = data.get("query", "")
205
- response = data.get("response", "")
206
- feedback_type = data.get("feedback", "")
207
- record_feedback(feedback_type, query, response)
208
- return jsonify({"status": "success"})
209
-
210
- @app.route("/", methods=["GET"])
211
- def index():
212
- return "University Inquiries AI Chatbot API. Use POST /chat or /feedback.", 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  if __name__ == "__main__":
215
- app.run(host="0.0.0.0", port=7861)
 
1
+ import gradio as gr
 
 
 
 
 
2
  import json
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
  import os
7
  from huggingface_hub import upload_file, hf_hub_download, InferenceClient
8
+
9
+ PUP_Themed_css = """
10
+ html, body, .gradio-container, .gr-app {
11
+ height: 100% !important;
12
+ margin: 0 !important;
13
+ padding: 0 !important;
14
+ background: linear-gradient(to bottom right, #800000, #ff0000, #ffeb3b, #ffa500) !important;
15
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
16
+ color: #1b4332 !important;
17
+ }
18
+ """
19
 
20
  embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
21
+ inference_token = os.getenv("HF_TOKEN") or os.getenv("PUP_AI_Chatbot_Token")
22
  inference_client = InferenceClient(
23
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
24
  token=inference_token
25
  )
26
 
27
+ with open("dataset.json", "r") as f:
 
 
28
  dataset = json.load(f)
29
 
30
  questions = [item["question"] for item in dataset]
 
37
  feedback_embeddings = None
38
  dev_mode = {"enabled": False}
39
 
40
+ feedback_path = "outputs/feedback.json"
41
+ os.makedirs("outputs", exist_ok=True)
42
 
43
  try:
44
+ hf_token = os.getenv("PUP_AI_Chatbot_Token")
45
  downloaded_path = hf_hub_download(
46
  repo_id="oceddyyy/University_Inquiries_Feedback",
47
  filename="feedback.json",
 
58
  json.dump(feedback_data, f_local, indent=4)
59
 
60
  except Exception as e:
61
+ print(f"[Startup] No feedback loaded from HF: {e}")
62
  feedback_data = []
63
 
64
  def upload_feedback_to_hf():
65
+ hf_token = os.getenv("PUP_AI_Chatbot_Token")
66
  if not hf_token:
67
  raise ValueError("Hugging Face token not found in environment variables!")
68
 
 
78
  except Exception as e:
79
  print(f"Error uploading feedback to HF: {e}")
80
 
81
+ def chatbot_response(query, chat_history):
82
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
83
 
 
84
  if feedback_embeddings is not None:
85
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
86
  best_idx = int(np.argmax(feedback_scores))
 
95
 
96
  if best_score >= dynamic_threshold:
97
  response = matched_feedback["response"]
98
+ chat_history.append((query, response))
99
+ return "", chat_history, gr.update(visible=True)
100
 
 
101
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
102
  best_idx = int(np.argmax(similarity_scores))
103
  best_score = similarity_scores[best_idx]
104
+ matched_item = dataset[best_idx] # Changed this to get full entry including month/year
105
  matched_a = matched_item.get("answer", "")
106
 
107
+ if best_score < 0.4:
108
+ response = "Sorry, but the PUP handbook does not contain such information."
109
+ else:
110
+ if dev_mode["enabled"]:
111
+ prompt = (
112
+ f"A student asked:\n\"{query}\"\n\n"
113
+ f"Relevant handbook info:\n\"{matched_a}\"\n\n"
114
+ f"Please answer based only on this handbook content."
115
+ )
116
+ try:
117
+ response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
118
+ except Exception as e:
119
+ print(f"[ERROR] HF inference failed: {e}")
120
+ response = f"(Fallback) {matched_a}"
121
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if "month" in matched_item and "year" in matched_item:
123
  response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
124
  else:
125
  response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
 
126
 
127
+ chat_history.append((query, response.strip()))
128
+ return "", chat_history, gr.update(visible=True)
 
 
 
 
 
 
129
 
 
130
 
131
+ def record_feedback(feedback, chat_history):
132
  global feedback_embeddings, feedback_questions
133
+ if chat_history:
134
+ last_query, last_response = chat_history[-1]
135
+ matched = False
136
+ new_embedding = embedding_model.encode([last_query], convert_to_tensor=True)
137
+
138
+ for item in feedback_data:
139
+ existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
140
+ similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
141
+ if similarity >= 0.8 and item["response"] == last_response:
142
+ matched = True
143
+ votes = {"positive": "upvotes", "negative": "downvotes"}
144
+ item[votes[feedback]] = item.get(votes[feedback], 0) + 1
145
+ break
146
+
147
+ if not matched:
148
+ entry = {
149
+ "question": last_query,
150
+ "response": last_response,
151
+ "feedback": feedback,
152
+ "upvotes": 1 if feedback == "positive" else 0,
153
+ "downvotes": 1 if feedback == "negative" else 0
154
+ }
155
+ feedback_data.append(entry)
156
+
157
+ with open(feedback_path, "w") as f:
158
+ json.dump(feedback_data, f, indent=4)
159
+
160
+ feedback_questions = [item["question"] for item in feedback_data]
161
+ if feedback_questions:
162
+ feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
163
+
164
+ upload_feedback_to_hf()
165
+
166
+ return gr.update(visible=False)
167
+
168
+ with gr.Blocks(css=PUP_Themed_css, title="University Handbook AI Chatbot") as demo:
169
+ gr.Markdown(
170
+ """
171
+ <div style='
172
+ background-color: var(--block-background-fill);
173
+ border-radius: 16px;
174
+ padding: 24px 16px;
175
+ margin-bottom: 24px;
176
+ box-shadow: 0 6px 16px rgba(0, 0, 0, 0.15);
177
+ max-width: 700px;
178
+ margin-left: auto;
179
+ margin-right: auto;
180
+ text-align: center;
181
+ color: var(--text-color);'>
182
+ <h1 style='font-size: 2.2rem; margin: 0;'>University Inquiries AI Chatbot</h1>
183
+ </div>
184
+ """
185
+ )
186
+
187
+ state = gr.State(chat_history)
188
+ chatbot_ui = gr.Chatbot(label="Chat", show_label=False)
189
+
190
+ with gr.Row():
191
+ dev_btn = gr.Button("DevMode πŸ”")
192
+ password_box = gr.Textbox(placeholder="Enter Dev password", type="password", visible=False, show_label=False)
193
+ confirm_btn = gr.Button("Confirm", visible=False)
194
+
195
+ dev_pass = os.getenv("DEV_MODE_PASSWORD", "letmein")
196
+
197
+ def show_password_input():
198
+ return gr.update(visible=True), gr.update(visible=True)
199
+
200
+ def enable_devmode(password_input):
201
+ if password_input == dev_pass:
202
+ dev_mode["enabled"] = True
203
+ return gr.update(visible=False), gr.update(visible=False), gr.update(value="DevMode βœ…", interactive=False)
204
+ return gr.update(visible=True), gr.update(visible=True), gr.update(value="Wrong password. Try again.")
205
+
206
+ dev_btn.click(show_password_input, outputs=[password_box, confirm_btn])
207
+ confirm_btn.click(enable_devmode, inputs=[password_box], outputs=[password_box, confirm_btn, dev_btn])
208
+
209
+ with gr.Row():
210
+ query_input = gr.Textbox(placeholder="Type your question here...", show_label=False)
211
+ submit_btn = gr.Button("Submit")
212
+
213
+ with gr.Row(visible=False) as feedback_row:
214
+ gr.Markdown("Was this helpful?")
215
+ thumbs_up = gr.Button("πŸ‘")
216
+ thumbs_down = gr.Button("πŸ‘Ž")
217
+
218
+ def handle_submit(message, chat_state):
219
+ return chatbot_response(message, chat_state)
220
+
221
+ submit_btn.click(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
222
+ query_input.submit(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
223
+
224
+ thumbs_up.click(lambda state: record_feedback("positive", state), inputs=[state], outputs=[feedback_row])
225
+ thumbs_down.click(lambda state: record_feedback("negative", state), inputs=[state], outputs=[feedback_row])
226
 
227
  if __name__ == "__main__":
228
+ demo.launch()