midrees2806 commited on
Commit
9765f7f
Β·
verified Β·
1 Parent(s): 1756bf4

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +36 -30
rag.py CHANGED
@@ -25,10 +25,7 @@ HF_DATASET_REPO = "midrees2806/unmatched_queries"
25
  HF_TOKEN = os.getenv("HF_TOKEN")
26
 
27
  # Greeting list
28
- GREETINGS = [
29
- "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
30
- "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"
31
- ]
32
 
33
  # Load multiple JSON datasets
34
  dataset = []
@@ -38,9 +35,7 @@ try:
38
  with open(file_path, 'r', encoding='utf-8') as f:
39
  data = json.load(f)
40
  if isinstance(data, list):
41
- for item in data:
42
- if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
43
- dataset.append(item)
44
  except Exception as e:
45
  print(f"Error loading datasets: {e}")
46
 
@@ -68,16 +63,19 @@ def manage_unmatched_queries(query: str):
68
 
69
  def query_groq_llm(prompt):
70
  try:
 
71
  chat_completion = groq_client.chat.completions.create(
72
  messages=[{"role": "user", "content": prompt}],
73
  model="llama3-70b-8192",
74
  temperature=0.7,
75
- max_tokens=600
76
  )
77
  return chat_completion.choices[0].message.content.strip()
78
  except Exception as e:
79
  print(f"Error querying Groq API: {e}")
80
- return ""
 
 
81
 
82
  def get_best_answer(user_input):
83
  if not user_input.strip():
@@ -85,14 +83,14 @@ def get_best_answer(user_input):
85
 
86
  user_input_lower = user_input.lower().strip()
87
 
88
- # Basic Validation
89
  if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
90
  return "Please ask your question properly with at least 3 words."
91
 
92
  # Fee Check
93
  if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
94
  return (
95
- "πŸ’° For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
 
96
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
97
  )
98
 
@@ -103,36 +101,44 @@ def get_best_answer(user_input):
103
  best_score = similarities[best_match_idx].item()
104
 
105
  if best_score >= 0.65:
106
- # PATH 1: Dataset Match (Rephrase with LLM)
107
  original_answer = dataset_answers[best_match_idx]
108
- prompt = f"""Name is UOE AI Assistant! You are an official assistant for the University of Education Lahore.
109
- Rephrase the following official answer clearly and professionally using bullet points or headings where needed.
110
- DO NOT add extra information.
111
 
112
  Question: {user_input}
113
- Original Answer: {original_answer}
114
- Rephrased Answer:"""
115
  else:
116
- # PATH 2: No Dataset Match (Use LLM Knowledge + Logging)
117
  manage_unmatched_queries(user_input)
118
 
119
- prompt = f"""You are the UOE AI Assistant for University of Education Lahore.
120
  The user asked: "{user_input}".
121
- 1. Answer this question based on your general knowledge about University of Education Lahore.
122
- 2. After the answer, strictly include a note saying that this specific query has been forwarded to the support team for verification and will be added to our verified database soon.
123
- 3. Mention that for 100% confirmed information, they should visit the official website (https://ue.edu.pk) or contact:
124
- - Phone: +92-42-99262231-33
125
- - Email: info@ue.edu.pk
126
 
127
- Make the response professional and formatted with headings/points."""
 
 
 
 
 
 
 
 
 
128
 
129
  llm_response = query_groq_llm(prompt)
130
 
131
- # Cleaning up response labels if any
132
  if llm_response:
133
- for marker in ["Improved Answer:", "Official Answer:", "Rephrased Answer:"]:
134
- if marker in llm_response:
135
- return llm_response.split(marker)[-1].strip()
136
  return llm_response
 
 
 
 
137
  else:
138
- return dataset_answers[best_match_idx] if best_score >= 0.65 else "Please contact info@ue.edu.pk for assistance."
 
 
 
 
 
 
25
  HF_TOKEN = os.getenv("HF_TOKEN")
26
 
27
  # Greeting list
28
+ GREETINGS = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"]
 
 
 
29
 
30
  # Load multiple JSON datasets
31
  dataset = []
 
35
  with open(file_path, 'r', encoding='utf-8') as f:
36
  data = json.load(f)
37
  if isinstance(data, list):
38
+ dataset.extend([item for item in data if isinstance(item, dict) and 'Question' in item and 'Answer' in item])
 
 
39
  except Exception as e:
40
  print(f"Error loading datasets: {e}")
41
 
 
63
 
64
  def query_groq_llm(prompt):
65
  try:
66
+ # Temperature 0.7 rakha hai taake har baar response rephrase ho kar aaye
67
  chat_completion = groq_client.chat.completions.create(
68
  messages=[{"role": "user", "content": prompt}],
69
  model="llama3-70b-8192",
70
  temperature=0.7,
71
+ max_tokens=800
72
  )
73
  return chat_completion.choices[0].message.content.strip()
74
  except Exception as e:
75
  print(f"Error querying Groq API: {e}")
76
+ return None # None return karega agar API fail hui
77
+
78
+
79
 
80
  def get_best_answer(user_input):
81
  if not user_input.strip():
 
83
 
84
  user_input_lower = user_input.lower().strip()
85
 
 
86
  if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
87
  return "Please ask your question properly with at least 3 words."
88
 
89
  # Fee Check
90
  if any(keyword in user_input_lower for keyword in ["fee structure", "fees structure", "semester fees", "semester fee"]):
91
  return (
92
+ "πŸ’° **Fee Structure Information**\n\n"
93
+ "University of Education Lahore ki up-to-date fee maloomat ke liye niche diye gaye official link par click karen:\n"
94
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
95
  )
96
 
 
101
  best_score = similarities[best_match_idx].item()
102
 
103
  if best_score >= 0.65:
104
+ # PATH 1: Dataset Match
105
  original_answer = dataset_answers[best_match_idx]
106
+ prompt = f"""You are the official UOE AI Assistant. Rephrase the following verified answer into a professional and attractive format.
107
+ Use headings and bullet points. Do not add external facts.
 
108
 
109
  Question: {user_input}
110
+ Verified Answer: {original_answer}"""
 
111
  else:
112
+ # PATH 2: No Dataset Match - LLM Knowledge + Precise Instruction
113
  manage_unmatched_queries(user_input)
114
 
115
+ prompt = f"""You are the UOE AI Assistant for University of Education (UE) Lahore.
116
  The user asked: "{user_input}".
 
 
 
 
 
117
 
118
+ Task:
119
+ 1. Answer the question using your knowledge about University of Education Lahore.
120
+ 2. At the end, add this exact notice:
121
+ "πŸ“’ *Note: Aapki ye query hamari support team ko forward kar di gayi hai kyunke hamare pas abhi users ki queries zyada hain. Support team isay jald verified database mein shamil kar degi taake next time aapko mazeed behtar jawab mil sakay.*"
122
+ 3. Provide official contact details:
123
+ 🌐 Website: https://ue.edu.pk
124
+ πŸ“ž Phone: +92-42-99262231-33
125
+ βœ‰οΈ Email: info@ue.edu.pk
126
+
127
+ Format the response with professional headings and bold text."""
128
 
129
  llm_response = query_groq_llm(prompt)
130
 
131
+ # Agar Groq ne jawab diya to wo dikhao
132
  if llm_response:
 
 
 
133
  return llm_response
134
+
135
+ # Bilkul aakhri fallback agar Groq API down ho
136
+ if best_score >= 0.65:
137
+ return f"Verified Answer: {dataset_answers[best_match_idx]}"
138
  else:
139
+ return (
140
+ "I'm sorry, I'm having trouble connecting to my brain right now. πŸ˜…\n\n"
141
+ "Lekin maine aapki query support team ko bhej di hai. Official maloomat ke liye:\n"
142
+ "πŸ“ž +92-42-99262231-33\n"
143
+ "βœ‰οΈ info@ue.edu.pk"
144
+ )