Spaces:
Sleeping
Sleeping
Update rag.py
Browse files
rag.py
CHANGED
|
@@ -25,10 +25,7 @@ HF_DATASET_REPO = "midrees2806/unmatched_queries"
|
|
| 25 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 26 |
|
| 27 |
# Greeting list
|
| 28 |
-
GREETINGS = [
|
| 29 |
-
"hi", "hello", "hey", "good morning", "good afternoon", "good evening",
|
| 30 |
-
"assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"
|
| 31 |
-
]
|
| 32 |
|
| 33 |
# Load multiple JSON datasets
|
| 34 |
dataset = []
|
|
@@ -38,9 +35,7 @@ try:
|
|
| 38 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 39 |
data = json.load(f)
|
| 40 |
if isinstance(data, list):
|
| 41 |
-
for item in data
|
| 42 |
-
if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
|
| 43 |
-
dataset.append(item)
|
| 44 |
except Exception as e:
|
| 45 |
print(f"Error loading datasets: {e}")
|
| 46 |
|
|
@@ -68,16 +63,19 @@ def manage_unmatched_queries(query: str):
|
|
| 68 |
|
| 69 |
def query_groq_llm(prompt):
|
| 70 |
try:
|
|
|
|
| 71 |
chat_completion = groq_client.chat.completions.create(
|
| 72 |
messages=[{"role": "user", "content": prompt}],
|
| 73 |
model="llama3-70b-8192",
|
| 74 |
temperature=0.7,
|
| 75 |
-
max_tokens=
|
| 76 |
)
|
| 77 |
return chat_completion.choices[0].message.content.strip()
|
| 78 |
except Exception as e:
|
| 79 |
print(f"Error querying Groq API: {e}")
|
| 80 |
-
return
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def get_best_answer(user_input):
|
| 83 |
if not user_input.strip():
|
|
@@ -85,14 +83,14 @@ def get_best_answer(user_input):
|
|
| 85 |
|
| 86 |
user_input_lower = user_input.lower().strip()
|
| 87 |
|
| 88 |
-
# Basic Validation
|
| 89 |
if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
|
| 90 |
return "Please ask your question properly with at least 3 words."
|
| 91 |
|
| 92 |
# Fee Check
|
| 93 |
if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
|
| 94 |
return (
|
| 95 |
-
"π°
|
|
|
|
| 96 |
"π https://ue.edu.pk/allfeestructure.php"
|
| 97 |
)
|
| 98 |
|
|
@@ -103,36 +101,44 @@ def get_best_answer(user_input):
|
|
| 103 |
best_score = similarities[best_match_idx].item()
|
| 104 |
|
| 105 |
if best_score >= 0.65:
|
| 106 |
-
# PATH 1: Dataset Match
|
| 107 |
original_answer = dataset_answers[best_match_idx]
|
| 108 |
-
prompt = f"""
|
| 109 |
-
|
| 110 |
-
DO NOT add extra information.
|
| 111 |
|
| 112 |
Question: {user_input}
|
| 113 |
-
|
| 114 |
-
Rephrased Answer:"""
|
| 115 |
else:
|
| 116 |
-
# PATH 2: No Dataset Match
|
| 117 |
manage_unmatched_queries(user_input)
|
| 118 |
|
| 119 |
-
prompt = f"""You are the UOE AI Assistant for University of Education Lahore.
|
| 120 |
The user asked: "{user_input}".
|
| 121 |
-
1. Answer this question based on your general knowledge about University of Education Lahore.
|
| 122 |
-
2. After the answer, strictly include a note saying that this specific query has been forwarded to the support team for verification and will be added to our verified database soon.
|
| 123 |
-
3. Mention that for 100% confirmed information, they should visit the official website (https://ue.edu.pk) or contact:
|
| 124 |
-
- Phone: +92-42-99262231-33
|
| 125 |
-
- Email: info@ue.edu.pk
|
| 126 |
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
llm_response = query_groq_llm(prompt)
|
| 130 |
|
| 131 |
-
#
|
| 132 |
if llm_response:
|
| 133 |
-
for marker in ["Improved Answer:", "Official Answer:", "Rephrased Answer:"]:
|
| 134 |
-
if marker in llm_response:
|
| 135 |
-
return llm_response.split(marker)[-1].strip()
|
| 136 |
return llm_response
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
else:
|
| 138 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 26 |
|
| 27 |
# Greeting list
|
| 28 |
+
GREETINGS = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"]
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Load multiple JSON datasets
|
| 31 |
dataset = []
|
|
|
|
| 35 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 36 |
data = json.load(f)
|
| 37 |
if isinstance(data, list):
|
| 38 |
+
dataset.extend([item for item in data if isinstance(item, dict) and 'Question' in item and 'Answer' in item])
|
|
|
|
|
|
|
| 39 |
except Exception as e:
|
| 40 |
print(f"Error loading datasets: {e}")
|
| 41 |
|
|
|
|
| 63 |
|
| 64 |
def query_groq_llm(prompt):
|
| 65 |
try:
|
| 66 |
+
# Temperature 0.7 rakha hai taake har baar response rephrase ho kar aaye
|
| 67 |
chat_completion = groq_client.chat.completions.create(
|
| 68 |
messages=[{"role": "user", "content": prompt}],
|
| 69 |
model="llama3-70b-8192",
|
| 70 |
temperature=0.7,
|
| 71 |
+
max_tokens=800
|
| 72 |
)
|
| 73 |
return chat_completion.choices[0].message.content.strip()
|
| 74 |
except Exception as e:
|
| 75 |
print(f"Error querying Groq API: {e}")
|
| 76 |
+
return None # None return karega agar API fail hui
|
| 77 |
+
|
| 78 |
+
|
| 79 |
|
| 80 |
def get_best_answer(user_input):
|
| 81 |
if not user_input.strip():
|
|
|
|
| 83 |
|
| 84 |
user_input_lower = user_input.lower().strip()
|
| 85 |
|
|
|
|
| 86 |
if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
|
| 87 |
return "Please ask your question properly with at least 3 words."
|
| 88 |
|
| 89 |
# Fee Check
|
| 90 |
if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
|
| 91 |
return (
|
| 92 |
+
"π° **Fee Structure Information**\n\n"
|
| 93 |
+
"University of Education Lahore ki up-to-date fee maloomat ke liye niche diye gaye official link par click karen:\n"
|
| 94 |
"π https://ue.edu.pk/allfeestructure.php"
|
| 95 |
)
|
| 96 |
|
|
|
|
| 101 |
best_score = similarities[best_match_idx].item()
|
| 102 |
|
| 103 |
if best_score >= 0.65:
|
| 104 |
+
# PATH 1: Dataset Match
|
| 105 |
original_answer = dataset_answers[best_match_idx]
|
| 106 |
+
prompt = f"""You are the official UOE AI Assistant. Rephrase the following verified answer into a professional and attractive format.
|
| 107 |
+
Use headings and bullet points. Do not add external facts.
|
|
|
|
| 108 |
|
| 109 |
Question: {user_input}
|
| 110 |
+
Verified Answer: {original_answer}"""
|
|
|
|
| 111 |
else:
|
| 112 |
+
# PATH 2: No Dataset Match - LLM Knowledge + Precise Instruction
|
| 113 |
manage_unmatched_queries(user_input)
|
| 114 |
|
| 115 |
+
prompt = f"""You are the UOE AI Assistant for University of Education (UE) Lahore.
|
| 116 |
The user asked: "{user_input}".
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
Task:
|
| 119 |
+
1. Answer the question using your knowledge about University of Education Lahore.
|
| 120 |
+
2. At the end, add this exact notice:
|
| 121 |
+
"π’ *Note: Aapki ye query hamari support team ko forward kar di gayi hai kyunke hamare pas abhi users ki queries zyada hain. Support team isay jald verified database mein shamil kar degi taake next time aapko mazeed behtar jawab mil sakay.*"
|
| 122 |
+
3. Provide official contact details:
|
| 123 |
+
π Website: https://ue.edu.pk
|
| 124 |
+
π Phone: +92-42-99262231-33
|
| 125 |
+
βοΈ Email: info@ue.edu.pk
|
| 126 |
+
|
| 127 |
+
Format the response with professional headings and bold text."""
|
| 128 |
|
| 129 |
llm_response = query_groq_llm(prompt)
|
| 130 |
|
| 131 |
+
# Agar Groq ne jawab diya to wo dikhao
|
| 132 |
if llm_response:
|
|
|
|
|
|
|
|
|
|
| 133 |
return llm_response
|
| 134 |
+
|
| 135 |
+
# Bilkul aakhri fallback agar Groq API down ho
|
| 136 |
+
if best_score >= 0.65:
|
| 137 |
+
return f"Verified Answer: {dataset_answers[best_match_idx]}"
|
| 138 |
else:
|
| 139 |
+
return (
|
| 140 |
+
"I'm sorry, I'm having trouble connecting to my brain right now. π
\n\n"
|
| 141 |
+
"Lekin maine aapki query support team ko bhej di hai. Official maloomat ke liye:\n"
|
| 142 |
+
"π +92-42-99262231-33\n"
|
| 143 |
+
"βοΈ info@ue.edu.pk"
|
| 144 |
+
)
|