Spaces:

midrees2806
/

Chatbot

Running

App Files Files Community

midrees2806 commited on Jun 4, 2025

Commit

dba6f87

verified ·

1 Parent(s): 9e69b9a

Update rag.py

Browse files

Files changed (1) hide show

rag.py +51 -37

rag.py CHANGED Viewed

@@ -2,14 +2,10 @@ import json
 from sentence_transformers import SentenceTransformer, util
 from groq import Groq
 from datetime import datetime
-import requests
-from datasets import load_dataset, Dataset
-from io import BytesIO
-from PIL import Image, ImageDraw, ImageFont
-import numpy as np
-from dotenv import load_dotenv
 import os
 import pandas as pd
 # Load environment variables
 load_dotenv()
@@ -17,16 +13,22 @@ load_dotenv()
 # Initialize Groq client
 groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# Load models and dataset
 similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 # Config
 HF_DATASET_REPO = "midrees2806/unmatched_queries"
 HF_TOKEN = os.getenv("HF_TOKEN")
-# Load dataset (automatically using the path)
-with open('dataset.json', 'r') as f:
-    dataset = json.load(f)
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
@@ -50,6 +52,7 @@ def manage_unmatched_queries(query: str):
     except Exception as e:
         print(f"Failed to save query: {e}")
 def query_groq_llm(prompt, model_name="llama3-70b-8192"):
     try:
         chat_completion = groq_client.chat.completions.create(
@@ -66,24 +69,23 @@ def query_groq_llm(prompt, model_name="llama3-70b-8192"):
         print(f"Error querying Groq API: {e}")
         return ""
 def get_best_answer(user_input):
     if not user_input.strip():
         return "Please enter a valid question."
     user_input_lower = user_input.lower().strip()
-    if len(user_input_lower.split()) < 3:
         return "Please ask your question properly with at least 3 words."
-    # 👉 Check if question is about fee
-    if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure"]):
         return (
             "💰 For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
-            "You’ll find comprehensive information regarding tuition, admission charges, and other applicable fees there.\n"
             "🔗 https://ue.edu.pk/allfeestructure.php"
         )
-    # 🔁 Continue with normal similarity-based logic
     user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
     similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
     best_match_idx = similarities.argmax().item()
@@ -91,33 +93,45 @@ def get_best_answer(user_input):
     if best_score < 0.65:
         manage_unmatched_queries(user_input)
     if best_score >= 0.65:
         original_answer = dataset_answers[best_match_idx]
-        prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
-        Question: {user_input}
-        Original Answer: {original_answer}
-        Improved Answer:"""
     else:
-        prompt = f"""As an official assistant for University of Education Lahore, provide a helpful response:
-        Include relevant details about university policies.
-        If unsure, direct to official channels.
-        Question: {user_input}
-        Official Answer:"""
     llm_response = query_groq_llm(prompt)
     if llm_response:
-        for marker in ["Improved Answer:", "Official Answer:"]:
             if marker in llm_response:
-                response = llm_response.split(marker)[-1].strip()
-                break
-        else:
-            response = llm_response
     else:
-        response = dataset_answers[best_match_idx] if best_score >= 0.65 else """For official information:
-        📞 +92-42-99262231-33
-        ✉️ info@ue.edu.pk
-        🌐 ue.edu.pk"""
-    return response

 from sentence_transformers import SentenceTransformer, util
 from groq import Groq
 from datetime import datetime
 import os
 import pandas as pd
+from datasets import load_dataset, Dataset
+from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Initialize Groq client
 groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# Load similarity model
 similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 # Config
 HF_DATASET_REPO = "midrees2806/unmatched_queries"
 HF_TOKEN = os.getenv("HF_TOKEN")
+# Load local dataset
+try:
+    with open('dataset.json', 'r') as f:
+        dataset = json.load(f)
+    if not all(isinstance(item, dict) and 'Question' in item and 'Answer' in item for item in dataset):
+        raise ValueError("Invalid dataset structure")
+except Exception as e:
+    print(f"Error loading dataset: {e}")
+    dataset = []
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
     except Exception as e:
         print(f"Failed to save query: {e}")
+# Query Groq LLM
 def query_groq_llm(prompt, model_name="llama3-70b-8192"):
     try:
         chat_completion = groq_client.chat.completions.create(
         print(f"Error querying Groq API: {e}")
         return ""
+# Main logic function to be called from Gradio
 def get_best_answer(user_input):
     if not user_input.strip():
         return "Please enter a valid question."
     user_input_lower = user_input.lower().strip()
+    if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
         return "Please ask your question properly with at least 3 words."
+    if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
         return (
             "💰 For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
+            "You'll find comprehensive information regarding tuition, admission charges, and other applicable fees there.\n"
             "🔗 https://ue.edu.pk/allfeestructure.php"
         )
     user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
     similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
     best_match_idx = similarities.argmax().item()
     if best_score < 0.65:
         manage_unmatched_queries(user_input)
     if best_score >= 0.65:
         original_answer = dataset_answers[best_match_idx]
+        prompt = f"""Name is UOE AI Assistant! You are an official assistant for the University of Education Lahore.
+Rephrase the following official answer clearly and professionally.
+Use structured formatting (like headings, bullet points, or numbered lists) where appropriate.
+DO NOT add any new or extra information. ONLY rephrase and improve the clarity and formatting of the original answer.
+### Question:
+{user_input}
+### Original Answer:
+{original_answer}
+### Rephrased Answer:
+"""
     else:
+        prompt = f"""Name is UOE AI Assistant! As an official assistant for University of Education Lahore, provide a helpful response:
+Include relevant details about university policies.
+If unsure, direct to official channels.
+### Question:
+{user_input}
+### Official Answer:
+"""
     llm_response = query_groq_llm(prompt)
     if llm_response:
+        for marker in ["Improved Answer:", "Official Answer:", "Rephrased Answer:"]:
             if marker in llm_response:
+                return llm_response.split(marker)[-1].strip()
+        return llm_response
     else:
+        return dataset_answers[best_match_idx] if best_score >= 0.65 else (
+            "For official information:\n"
+            "📞 +92-42-99262231-33\n"
+            "✉️ info@ue.edu.pk\n"
+            "🌐 https://ue.edu.pk"
+        )