Spaces:

midrees2806
/

UoeChatbot

Sleeping

App Files Files Community

midrees2806 commited on Jan 9

Commit

9765f7f

verified ·

1 Parent(s): 1756bf4

Update rag.py

Browse files

Files changed (1) hide show

rag.py +36 -30

rag.py CHANGED Viewed

@@ -25,10 +25,7 @@ HF_DATASET_REPO = "midrees2806/unmatched_queries"
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Greeting list
-GREETINGS = [
-    "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
-    "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"
-]
 # Load multiple JSON datasets
 dataset = []
@@ -38,9 +35,7 @@ try:
         with open(file_path, 'r', encoding='utf-8') as f:
             data = json.load(f)
             if isinstance(data, list):
-                for item in data:
-                    if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
-                        dataset.append(item)
 except Exception as e:
     print(f"Error loading datasets: {e}")
@@ -68,16 +63,19 @@ def manage_unmatched_queries(query: str):
 def query_groq_llm(prompt):
     try:
         chat_completion = groq_client.chat.completions.create(
             messages=[{"role": "user", "content": prompt}],
             model="llama3-70b-8192",
             temperature=0.7,
-            max_tokens=600
         )
         return chat_completion.choices[0].message.content.strip()
     except Exception as e:
         print(f"Error querying Groq API: {e}")
-        return ""
 def get_best_answer(user_input):
     if not user_input.strip():
@@ -85,14 +83,14 @@ def get_best_answer(user_input):
     user_input_lower = user_input.lower().strip()
-    # Basic Validation
     if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
         return "Please ask your question properly with at least 3 words."
     # Fee Check
     if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
         return (
-            "💰 For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
             "🔗 https://ue.edu.pk/allfeestructure.php"
         )
@@ -103,36 +101,44 @@ def get_best_answer(user_input):
     best_score = similarities[best_match_idx].item()
     if best_score >= 0.65:
-        # PATH 1: Dataset Match (Rephrase with LLM)
         original_answer = dataset_answers[best_match_idx]
-        prompt = f"""Name is UOE AI Assistant! You are an official assistant for the University of Education Lahore.
-        Rephrase the following official answer clearly and professionally using bullet points or headings where needed.
-        DO NOT add extra information.
         Question: {user_input}
-        Original Answer: {original_answer}
-        Rephrased Answer:"""
     else:
-        # PATH 2: No Dataset Match (Use LLM Knowledge + Logging)
         manage_unmatched_queries(user_input)
-        prompt = f"""You are the UOE AI Assistant for University of Education Lahore.
         The user asked: "{user_input}".
-        1. Answer this question based on your general knowledge about University of Education Lahore.
-        2. After the answer, strictly include a note saying that this specific query has been forwarded to the support team for verification and will be added to our verified database soon.
-        3. Mention that for 100% confirmed information, they should visit the official website (https://ue.edu.pk) or contact:
-           - Phone: +92-42-99262231-33
-           - Email: info@ue.edu.pk
-        Make the response professional and formatted with headings/points."""
     llm_response = query_groq_llm(prompt)
-    # Cleaning up response labels if any
     if llm_response:
-        for marker in ["Improved Answer:", "Official Answer:", "Rephrased Answer:"]:
-            if marker in llm_response:
-                return llm_response.split(marker)[-1].strip()
         return llm_response
     else:
-        return dataset_answers[best_match_idx] if best_score >= 0.65 else "Please contact info@ue.edu.pk for assistance."

 HF_TOKEN = os.getenv("HF_TOKEN")
 # Greeting list
+GREETINGS = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"]
 # Load multiple JSON datasets
 dataset = []
         with open(file_path, 'r', encoding='utf-8') as f:
             data = json.load(f)
             if isinstance(data, list):
+                dataset.extend([item for item in data if isinstance(item, dict) and 'Question' in item and 'Answer' in item])
 except Exception as e:
     print(f"Error loading datasets: {e}")
 def query_groq_llm(prompt):
     try:
+        # Temperature 0.7 rakha hai taake har baar response rephrase ho kar aaye
         chat_completion = groq_client.chat.completions.create(
             messages=[{"role": "user", "content": prompt}],
             model="llama3-70b-8192",
             temperature=0.7,
+            max_tokens=800
         )
         return chat_completion.choices[0].message.content.strip()
     except Exception as e:
         print(f"Error querying Groq API: {e}")
+        return None # None return karega agar API fail hui
 def get_best_answer(user_input):
     if not user_input.strip():
     user_input_lower = user_input.lower().strip()
     if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
         return "Please ask your question properly with at least 3 words."
     # Fee Check
     if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
         return (
+            "💰 **Fee Structure Information**\n\n"
+            "University of Education Lahore ki up-to-date fee maloomat ke liye niche diye gaye official link par click karen:\n"
             "🔗 https://ue.edu.pk/allfeestructure.php"
         )
     best_score = similarities[best_match_idx].item()
     if best_score >= 0.65:
+        # PATH 1: Dataset Match
         original_answer = dataset_answers[best_match_idx]
+        prompt = f"""You are the official UOE AI Assistant. Rephrase the following verified answer into a professional and attractive format.
+        Use headings and bullet points. Do not add external facts.
         Question: {user_input}
+        Verified Answer: {original_answer}"""
     else:
+        # PATH 2: No Dataset Match - LLM Knowledge + Precise Instruction
         manage_unmatched_queries(user_input)
+        prompt = f"""You are the UOE AI Assistant for University of Education (UE) Lahore.
         The user asked: "{user_input}".
+        Task:
+        1. Answer the question using your knowledge about University of Education Lahore.
+        2. At the end, add this exact notice:
+           "📢 *Note: Aapki ye query hamari support team ko forward kar di gayi hai kyunke hamare pas abhi users ki queries zyada hain. Support team isay jald verified database mein shamil kar degi taake next time aapko mazeed behtar jawab mil sakay.*"
+        3. Provide official contact details:
+           🌐 Website: https://ue.edu.pk
+           📞 Phone: +92-42-99262231-33
+           ✉️ Email: info@ue.edu.pk
+        Format the response with professional headings and bold text."""
     llm_response = query_groq_llm(prompt)
+    # Agar Groq ne jawab diya to wo dikhao
     if llm_response:
         return llm_response
+    # Bilkul aakhri fallback agar Groq API down ho
+    if best_score >= 0.65:
+        return f"Verified Answer: {dataset_answers[best_match_idx]}"
     else:
+        return (
+            "I'm sorry, I'm having trouble connecting to my brain right now. 😅\n\n"
+            "Lekin maine aapki query support team ko bhej di hai. Official maloomat ke liye:\n"
+            "📞 +92-42-99262231-33\n"
+            "✉️ info@ue.edu.pk"
+        )