Spaces:

UniversityAIChatbot
/

UnivAI_Inquiries_Chatbot

Sleeping

App Files Files Community

oceddyyy commited on Oct 19, 2025

Commit

1473519

verified ·

1 Parent(s): b46142c

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -40

app.py CHANGED Viewed

@@ -6,10 +6,9 @@ import numpy as np
 from huggingface_hub import upload_file, hf_hub_download, InferenceClient
 from flask import Flask, request, jsonify
 import time
-import tempfile
-# === Setup cache directories ===
 os.environ["HF_HOME"] = "/tmp/.cache"
 os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
 os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
@@ -17,7 +16,7 @@ os.makedirs("/tmp/.cache", exist_ok=True)
 os.makedirs("/tmp/outputs", exist_ok=True)
-# === Initialize models ===
 embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
 token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
 inference_client = InferenceClient(
@@ -26,7 +25,7 @@ inference_client = InferenceClient(
 )
-# === Load dataset ===
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
 with open(DATASET_PATH, "r") as f:
@@ -37,7 +36,7 @@ answers = [item["answer"] for item in dataset]
 question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
-# === Feedback system setup ===
 feedback_data = []
 feedback_questions = []
 feedback_embeddings = None
@@ -71,9 +70,9 @@ except Exception as e:
     feedback_data = []
-# === Helper: upload file to HF ===
 def upload_file_to_hf(local_path, remote_filename):
-    """Upload a single file to Hugging Face dataset repo."""
     hf_token = os.getenv("NEW_PUP_AI_Project")
     if not hf_token:
         raise ValueError("Hugging Face token not found in environment variables!")
@@ -86,16 +85,16 @@ def upload_file_to_hf(local_path, remote_filename):
             repo_type="dataset",
             token=hf_token
         )
-        print(f"[UPLOAD] {remote_filename} uploaded successfully to Hugging Face.")
     except Exception as e:
-        print(f"[ERROR] Upload failed for {remote_filename}: {e}")
-# === Chatbot core ===
 def chatbot_response(query, dev_mode_flag):
     query_embedding = embedding_model.encode([query], convert_to_tensor=True)
-    # Check feedback-based matches first
     if feedback_embeddings is not None:
         feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
         best_idx = int(np.argmax(feedback_scores))
@@ -111,7 +110,7 @@ def chatbot_response(query, dev_mode_flag):
         if best_score >= dynamic_threshold:
             return matched_feedback["response"], "Feedback", 0.0
-    # Otherwise, use main dataset
     similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
     top_k = 3
     top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
@@ -123,7 +122,7 @@ def chatbot_response(query, dev_mode_flag):
     matched_source = matched_item.get("source", "PUP Handbook")
     best_score = top_k_scores[0]
-    # Developer mode (LLM synthesis)
     if dev_mode_flag:
         context = ""
         for i, item in enumerate(top_k_items):
@@ -180,7 +179,7 @@ def chatbot_response(query, dev_mode_flag):
             error_msg = f"[ERROR] HF inference failed: {e}"
             return f"(UnivAI+++ error: {error_msg})", matched_source, 0.0
-    # Normal retrieval mode
     if best_score < 0.4:
         response = "Sorry, but the PUP handbook does not contain such information."
     else:
@@ -188,12 +187,13 @@ def chatbot_response(query, dev_mode_flag):
             response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
         else:
             response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
     return response.strip(), matched_source, 0.0
-# === Improved feedback recording (uploads only new/updated entries) ===
 def record_feedback(feedback_type, user_query, chatbot_response_text, comment=None):
-    """Records feedback and uploads only new or updated entry."""
     global feedback_embeddings, feedback_questions
     matched = False
     new_embedding = embedding_model.encode([user_query], convert_to_tensor=True)
@@ -205,32 +205,19 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
             matched = True
             votes = {"positive": "upvotes", "negative": "downvotes"}
             item[votes[feedback_type]] = item.get(votes[feedback_type], 0) + 1
-            # Only upload the updated item (not full dataset)
-            with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
-                json.dump([item], tempf, indent=4)
-                tempf_path = tempf.name
-            upload_file_to_hf(tempf_path, "latest_feedback_update.json")
-            os.remove(tempf_path)
             break
     if not matched:
         entry = {
-            "question": user_query,
-            "response": chatbot_response_text,
             "feedback": feedback_type,
             "upvotes": 1 if feedback_type == "positive" else 0,
             "downvotes": 1 if feedback_type == "negative" else 0
         }
         feedback_data.append(entry)
-        # Save only this new entry remotely
-        with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
-            json.dump([entry], tempf, indent=4)
-            tempf_path = tempf.name
-        upload_file_to_hf(tempf_path, "latest_feedback_entry.json")
-        os.remove(tempf_path)
-    # Always update local JSON + embeddings
     with open(feedback_path, "w") as f:
         json.dump(feedback_data, f, indent=4)
@@ -238,7 +225,10 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
     if feedback_questions:
         feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
-    # Comment saving (optional)
     if comment and comment.strip():
         try:
             with open(COMMENTS_PATH, "r") as f:
@@ -258,15 +248,10 @@ def record_feedback(feedback_type, user_query, chatbot_response_text, comment=No
         with open(COMMENTS_PATH, "w") as f:
             json.dump(comments_list, f, indent=4)
-        # Upload only the latest comment
-        with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as tempf:
-            json.dump([comment_entry], tempf, indent=4)
-            tempf_path = tempf.name
-        upload_file_to_hf(tempf_path, "latest_comment_entry.json")
-        os.remove(tempf_path)
-# === Flask API routes ===
 app = Flask(__name__)
 @app.route("/api/chat", methods=["POST"])

 from huggingface_hub import upload_file, hf_hub_download, InferenceClient
 from flask import Flask, request, jsonify
 import time
+# Setup caching and directories
 os.environ["HF_HOME"] = "/tmp/.cache"
 os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
 os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
 os.makedirs("/tmp/outputs", exist_ok=True)
+# Initialize models and clients
 embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
 token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
 inference_client = InferenceClient(
 )
+# Load dataset
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
 with open(DATASET_PATH, "r") as f:
 question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
+# Feedback system setup
 feedback_data = []
 feedback_questions = []
 feedback_embeddings = None
     feedback_data = []
+# Upload helper
 def upload_file_to_hf(local_path, remote_filename):
+    """Helper to upload any file to Hugging Face dataset repo."""
     hf_token = os.getenv("NEW_PUP_AI_Project")
     if not hf_token:
         raise ValueError("Hugging Face token not found in environment variables!")
             repo_type="dataset",
             token=hf_token
         )
+        print(f"{remote_filename} uploaded to Hugging Face successfully.")
     except Exception as e:
+        print(f"Error uploading {remote_filename} to HF: {e}")
+# Chatbot main logic
 def chatbot_response(query, dev_mode_flag):
     query_embedding = embedding_model.encode([query], convert_to_tensor=True)
+    # Check for feedback-based matches first
     if feedback_embeddings is not None:
         feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
         best_idx = int(np.argmax(feedback_scores))
         if best_score >= dynamic_threshold:
             return matched_feedback["response"], "Feedback", 0.0
+    # Otherwise, match from dataset
     similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
     top_k = 3
     top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
     matched_source = matched_item.get("source", "PUP Handbook")
     best_score = top_k_scores[0]
+    # Developer mode (with LLM generation)
     if dev_mode_flag:
         context = ""
         for i, item in enumerate(top_k_items):
             error_msg = f"[ERROR] HF inference failed: {e}"
             return f"(UnivAI+++ error: {error_msg})", matched_source, 0.0
+    # Regular retrieval-based response
     if best_score < 0.4:
         response = "Sorry, but the PUP handbook does not contain such information."
     else:
             response = f"As of {matched_item['month']}, {matched_item['year']}, {matched_a}"
         else:
             response = f"According to 2019 Proposed PUP Handbook, {matched_a}"
     return response.strip(), matched_source, 0.0
+# ✅ FIXED FUNCTION: Records feedback correctly
 def record_feedback(feedback_type, user_query, chatbot_response_text, comment=None):
+    """Records user feedback and optional comment."""
     global feedback_embeddings, feedback_questions
     matched = False
     new_embedding = embedding_model.encode([user_query], convert_to_tensor=True)
             matched = True
             votes = {"positive": "upvotes", "negative": "downvotes"}
             item[votes[feedback_type]] = item.get(votes[feedback_type], 0) + 1
             break
     if not matched:
         entry = {
+            "question": user_query,  # ✅ user’s question
+            "response": chatbot_response_text,  # ✅ chatbot’s answer
             "feedback": feedback_type,
             "upvotes": 1 if feedback_type == "positive" else 0,
             "downvotes": 1 if feedback_type == "negative" else 0
         }
         feedback_data.append(entry)
+    # Save locally
     with open(feedback_path, "w") as f:
         json.dump(feedback_data, f, indent=4)
     if feedback_questions:
         feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
+    # Upload to HF
+    upload_file_to_hf(feedback_path, "feedback.json")
+    # Save optional comments
     if comment and comment.strip():
         try:
             with open(COMMENTS_PATH, "r") as f:
         with open(COMMENTS_PATH, "w") as f:
             json.dump(comments_list, f, indent=4)
+        upload_file_to_hf(COMMENTS_PATH, "Comments.json")
+# Flask API setup
 app = Flask(__name__)
 @app.route("/api/chat", methods=["POST"])