Spaces:

UniversityAIChatbot
/

University_Inquiries_AI_Chatbot

Sleeping

App Files Files Community

oceddyyy commited on Sep 7

Commit

55b9a1f

verified ·

1 Parent(s): a5f699e

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -122

app.py CHANGED Viewed

@@ -1,27 +1,30 @@
-import os
-os.environ["HF_HOME"] = "/tmp/.cache"
-os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
-os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
-os.makedirs("/tmp/.cache", exist_ok=True)
 import json
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 import os
 from huggingface_hub import upload_file, hf_hub_download, InferenceClient
-from flask import Flask, request, jsonify
 embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
-inference_token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
 inference_client = InferenceClient(
     model="mistralai/Mixtral-8x7B-Instruct-v0.1",
     token=inference_token
 )
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
-with open(DATASET_PATH, "r") as f:
     dataset = json.load(f)
 questions = [item["question"] for item in dataset]
@@ -34,11 +37,11 @@ feedback_questions = []
 feedback_embeddings = None
 dev_mode = {"enabled": False}
-feedback_path = "/tmp/outputs/feedback.json"
-os.makedirs("/tmp/outputs", exist_ok=True)
 try:
-    hf_token = os.getenv("NEW_PUP_AI_Project")
     downloaded_path = hf_hub_download(
         repo_id="oceddyyy/University_Inquiries_Feedback",
         filename="feedback.json",
@@ -55,11 +58,11 @@ try:
         json.dump(feedback_data, f_local, indent=4)
 except Exception as e:
-    print(f"[Startup] Feedback not loaded from Hugging Face. Using local only. Reason: {e}")
     feedback_data = []
 def upload_feedback_to_hf():
-    hf_token = os.getenv("NEW_PUP_AI_Project")
     if not hf_token:
         raise ValueError("Hugging Face token not found in environment variables!")
@@ -75,10 +78,9 @@ def upload_feedback_to_hf():
     except Exception as e:
         print(f"Error uploading feedback to HF: {e}")
-def chatbot_response(query, dev_mode_flag):
     query_embedding = embedding_model.encode([query], convert_to_tensor=True)
-    # Feedback logic (optional, can keep as is)
     if feedback_embeddings is not None:
         feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
         best_idx = int(np.argmax(feedback_scores))
@@ -93,123 +95,134 @@ def chatbot_response(query, dev_mode_flag):
         if best_score >= dynamic_threshold:
             response = matched_feedback["response"]
-            return response
-    # Find most relevant handbook answer
     similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
     best_idx = int(np.argmax(similarity_scores))
     best_score = similarity_scores[best_idx]
-    matched_item = dataset[best_idx]
     matched_a = matched_item.get("answer", "")
-    # UnivAI+++ mode: always use Mistral LLM for response
-    if dev_mode_flag:
-        # Improved prompt: ask LLM to answer in its own words, based on handbook info
-        prompt = (
-            f"You are an expert university assistant. "
-            f"A student asked: \"{query}\"\n"
-            f"Here is the most relevant handbook information:\n\"{matched_a}\"\n"
-            f"Using only the information above, answer the student's question in your own words. "
-            f"If the handbook info is not relevant, say you don't know."
-        )
-        print("[DEBUG] Calling LLM with prompt:", prompt)  # Logging
-        try:
-            llm_response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
-            print("[DEBUG] LLM raw response:", llm_response)  # Logging
-            # Robust extraction of generated text
-            if hasattr(llm_response, "generated_text"):
-                response = llm_response.generated_text
-            elif isinstance(llm_response, dict) and "generated_text" in llm_response:
-                response = llm_response["generated_text"]
-            else:
-                response = str(llm_response)
-            # If LLM returns empty or just repeats handbook, fallback
-            if not response.strip() or response.strip() == matched_a.strip():
-                print("[DEBUG] LLM response empty or same as handbook, using fallback.")
-                if "month" in matched_item and "year" in matched_item:
-                    response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
-                else:
-                    response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
-        except Exception as e:
-            error_msg = f"[ERROR] HF inference failed: {e}"
-            print(error_msg)
-            # Fallback to handbook answer if LLM fails
             if "month" in matched_item and "year" in matched_item:
                 response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
             else:
                 response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
-        return response.strip()
-    # UnivAI mode: use only sentence-transformers
-    if best_score < 0.4:
-        response = "Sorry, but the PUP handbook does not contain such information."
-    else:
-        if "month" in matched_item and "year" in matched_item:
-            response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
-        else:
-            response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
-    return response.strip()
-def record_feedback(feedback, query, response):
     global feedback_embeddings, feedback_questions
-    matched = False
-    new_embedding = embedding_model.encode([query], convert_to_tensor=True)
-    for item in feedback_data:
-        existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
-        similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
-        if similarity >= 0.8 and item["response"] == response:
-            matched = True
-            votes = {"positive": "upvotes", "negative": "downvotes"}
-            item[votes[feedback]] = item.get(votes[feedback], 0) + 1
-            break
-    if not matched:
-        entry = {
-            "question": query,
-            "response": response,
-            "feedback": feedback,
-            "upvotes": 1 if feedback == "positive" else 0,
-            "downvotes": 1 if feedback == "negative" else 0
-        }
-        feedback_data.append(entry)
-    with open(feedback_path, "w") as f:
-        json.dump(feedback_data, f, indent=4)
-    feedback_questions = [item["question"] for item in feedback_data]
-    if feedback_questions:
-        feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
-    upload_feedback_to_hf()
-app = Flask(__name__)
-@app.route("/api/chat", methods=["POST"])
-def chat():
-    data = request.json
-    query = data.get("query", "")
-    dev = data.get("dev_mode", False)
-    dev_mode["enabled"] = dev
-    response = chatbot_response(query, dev)
-    return jsonify({"response": response})
-@app.route("/api/feedback", methods=["POST"])
-def feedback():
-    data = request.json
-    query = data.get("query", "")
-    response = data.get("response", "")
-    feedback_type = data.get("feedback", "")
-    record_feedback(feedback_type, query, response)
-    return jsonify({"status": "success"})
-@app.route("/", methods=["GET"])
-def index():
-    return "University Inquiries AI Chatbot API. Use POST /chat or /feedback.", 200
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7861)

+import gradio as gr
 import json
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 import os
 from huggingface_hub import upload_file, hf_hub_download, InferenceClient
+PUP_Themed_css = """
+html, body, .gradio-container, .gr-app {
+    height: 100% !important;
+    margin: 0 !important;
+    padding: 0 !important;
+    background: linear-gradient(to bottom right, #800000, #ff0000, #ffeb3b, #ffa500) !important;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
+    color: #1b4332 !important;
+}
+"""
 embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
+inference_token = os.getenv("HF_TOKEN") or os.getenv("PUP_AI_Chatbot_Token")
 inference_client = InferenceClient(
     model="mistralai/Mixtral-8x7B-Instruct-v0.1",
     token=inference_token
 )
+with open("dataset.json", "r") as f:
     dataset = json.load(f)
 questions = [item["question"] for item in dataset]
 feedback_embeddings = None
 dev_mode = {"enabled": False}
+feedback_path = "outputs/feedback.json"
+os.makedirs("outputs", exist_ok=True)
 try:
+    hf_token = os.getenv("PUP_AI_Chatbot_Token")
     downloaded_path = hf_hub_download(
         repo_id="oceddyyy/University_Inquiries_Feedback",
         filename="feedback.json",
         json.dump(feedback_data, f_local, indent=4)
 except Exception as e:
+    print(f"[Startup] No feedback loaded from HF: {e}")
     feedback_data = []
 def upload_feedback_to_hf():
+    hf_token = os.getenv("PUP_AI_Chatbot_Token")
     if not hf_token:
         raise ValueError("Hugging Face token not found in environment variables!")
     except Exception as e:
         print(f"Error uploading feedback to HF: {e}")
+def chatbot_response(query, chat_history):
     query_embedding = embedding_model.encode([query], convert_to_tensor=True)
     if feedback_embeddings is not None:
         feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
         best_idx = int(np.argmax(feedback_scores))
         if best_score >= dynamic_threshold:
             response = matched_feedback["response"]
+            chat_history.append((query, response))
+            return "", chat_history, gr.update(visible=True)
     similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
     best_idx = int(np.argmax(similarity_scores))
     best_score = similarity_scores[best_idx]
+    matched_item = dataset[best_idx]  # Changed this to get full entry including month/year
     matched_a = matched_item.get("answer", "")
+    if best_score < 0.4:
+        response = "Sorry, but the PUP handbook does not contain such information."
+    else:
+        if dev_mode["enabled"]:
+            prompt = (
+                f"A student asked:\n\"{query}\"\n\n"
+                f"Relevant handbook info:\n\"{matched_a}\"\n\n"
+                f"Please answer based only on this handbook content."
+            )
+            try:
+                response = inference_client.text_generation(prompt, max_new_tokens=200, temperature=0.7)
+            except Exception as e:
+                print(f"[ERROR] HF inference failed: {e}")
+                response = f"(Fallback) {matched_a}"
+        else:
             if "month" in matched_item and "year" in matched_item:
                 response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
             else:
                 response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
+    chat_history.append((query, response.strip()))
+    return "", chat_history, gr.update(visible=True)
+def record_feedback(feedback, chat_history):
     global feedback_embeddings, feedback_questions
+    if chat_history:
+        last_query, last_response = chat_history[-1]
+        matched = False
+        new_embedding = embedding_model.encode([last_query], convert_to_tensor=True)
+        for item in feedback_data:
+            existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True)
+            similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0]
+            if similarity >= 0.8 and item["response"] == last_response:
+                matched = True
+                votes = {"positive": "upvotes", "negative": "downvotes"}
+                item[votes[feedback]] = item.get(votes[feedback], 0) + 1
+                break
+        if not matched:
+            entry = {
+                "question": last_query,
+                "response": last_response,
+                "feedback": feedback,
+                "upvotes": 1 if feedback == "positive" else 0,
+                "downvotes": 1 if feedback == "negative" else 0
+            }
+            feedback_data.append(entry)
+        with open(feedback_path, "w") as f:
+            json.dump(feedback_data, f, indent=4)
+        feedback_questions = [item["question"] for item in feedback_data]
+        if feedback_questions:
+            feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
+        upload_feedback_to_hf()
+    return gr.update(visible=False)
+with gr.Blocks(css=PUP_Themed_css, title="University Handbook AI Chatbot") as demo:
+    gr.Markdown(
+    """
+    <div style='
+        background-color: var(--block-background-fill);
+        border-radius: 16px;
+        padding: 24px 16px;
+        margin-bottom: 24px;
+        box-shadow: 0 6px 16px rgba(0, 0, 0, 0.15);
+        max-width: 700px;
+        margin-left: auto;
+        margin-right: auto;
+        text-align: center;
+        color: var(--text-color);'>
+        <h1 style='font-size: 2.2rem; margin: 0;'>University Inquiries AI Chatbot</h1>
+    </div>
+    """
+)
+    state = gr.State(chat_history)
+    chatbot_ui = gr.Chatbot(label="Chat", show_label=False)
+    with gr.Row():
+        dev_btn = gr.Button("DevMode 🔐")
+        password_box = gr.Textbox(placeholder="Enter Dev password", type="password", visible=False, show_label=False)
+        confirm_btn = gr.Button("Confirm", visible=False)
+    dev_pass = os.getenv("DEV_MODE_PASSWORD", "letmein")
+    def show_password_input():
+        return gr.update(visible=True), gr.update(visible=True)
+    def enable_devmode(password_input):
+        if password_input == dev_pass:
+            dev_mode["enabled"] = True
+            return gr.update(visible=False), gr.update(visible=False), gr.update(value="DevMode ✅", interactive=False)
+        return gr.update(visible=True), gr.update(visible=True), gr.update(value="Wrong password. Try again.")
+    dev_btn.click(show_password_input, outputs=[password_box, confirm_btn])
+    confirm_btn.click(enable_devmode, inputs=[password_box], outputs=[password_box, confirm_btn, dev_btn])
+    with gr.Row():
+        query_input = gr.Textbox(placeholder="Type your question here...", show_label=False)
+        submit_btn = gr.Button("Submit")
+    with gr.Row(visible=False) as feedback_row:
+        gr.Markdown("Was this helpful?")
+        thumbs_up = gr.Button("👍")
+        thumbs_down = gr.Button("👎")
+    def handle_submit(message, chat_state):
+        return chatbot_response(message, chat_state)
+    submit_btn.click(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
+    query_input.submit(handle_submit, [query_input, state], [query_input, chatbot_ui, feedback_row])
+    thumbs_up.click(lambda state: record_feedback("positive", state), inputs=[state], outputs=[feedback_row])
+    thumbs_down.click(lambda state: record_feedback("negative", state), inputs=[state], outputs=[feedback_row])
 if __name__ == "__main__":
+    demo.launch()