Spaces:
Sleeping
Sleeping
File size: 6,116 Bytes
31bbe51 1756bf4 729f39a 1756bf4 729f39a 1756bf4 729f39a 1756bf4 31bbe51 729f39a 31bbe51 729f39a 9765f7f 729f39a 728639e 9765f7f 728639e 31bbe51 729f39a 31bbe51 1756bf4 31bbe51 1756bf4 31bbe51 bc54961 58dc6f1 bc54961 58dc6f1 20ecec9 58dc6f1 31bbe51 9765f7f 58dc6f1 9765f7f 58dc6f1 31bbe51 729f39a 31bbe51 bc54961 20ecec9 bc54961 20ecec9 bc54961 729f39a bc54961 47f4327 bc54961 31bbe51 bc54961 31bbe51 1756bf4 bc54961 1756bf4 20ecec9 bc54961 20ecec9 bc54961 20ecec9 bc54961 1756bf4 bc54961 171c15b bc54961 47f4327 bc54961 a9e3d91 58dc6f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import json
import glob
import os
import random
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
# Core AI Libraries
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from datasets import load_dataset, Dataset
# Load environment variables
load_dotenv()
# Initialize Groq client
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Load similarity model
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Config
HF_DATASET_REPO = "midrees2806/unmatched_queries"
HF_TOKEN = os.getenv("HF_TOKEN")
# Greeting list
GREETINGS = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"]
# Load multiple JSON datasets
dataset = []
try:
json_files = glob.glob('datasets/*.json')
for file_path in json_files:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
dataset.extend([item for item in data if isinstance(item, dict) and 'Question' in item and 'Answer' in item])
except Exception as e:
print(f"Error loading datasets: {e}")
# Precompute embeddings
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
dataset_answers = [item.get("Answer", "") for item in dataset]
dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
def manage_unmatched_queries(query: str):
try:
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
ds = load_dataset(HF_DATASET_REPO, token=HF_TOKEN)
df = ds["train"].to_pandas()
except:
df = pd.DataFrame(columns=["Query", "Timestamp", "Processed"])
if query not in df["Query"].values:
new_entry = {"Query": query, "Timestamp": timestamp, "Processed": False}
df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
updated_ds = Dataset.from_pandas(df)
updated_ds.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN)
except Exception as e:
print(f"Failed to save query: {e}")
def query_groq_llm(prompt):
try:
# Llama-3.3-70b-versatile with streaming enabled
completion = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{
"role": "system",
"content": "You are the official UOE AI Assistant. Your goal is to provide highly structured, professional, and easy-to-read information about the University of Education Lahore."
},
{
"role": "user",
"content": prompt
}
],
temperature=1,
max_completion_tokens=1024,
top_p=1,
stream=True,
stop=None
)
full_response = ""
for chunk in completion:
content = chunk.choices[0].delta.content or ""
full_response += content
return full_response.strip()
except Exception as e:
print(f"Groq API Error: {e}")
return None
def get_best_answer(user_input):
if not user_input.strip():
return "Please enter a valid question."
user_input_lower = user_input.lower().strip()
# Greeting Handling
if any(greet == user_input_lower for greet in GREETINGS):
return "Hello! I am the UOE AI Assistant. How can I help you regarding admissions, fees, or programs today?"
# Length check
if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
return "Please provide more details. I need at least 3 words to understand your query properly."
# # Direct Fee Link
# if any(kw in user_input_lower for kw in ["fee structure", "fees structure", "semester fees", "semester fee"]):
# return (
# "💰 **Official Fee Structure**\n\n"
# "For the most accurate and up-to-date fee details, please visit the official University of Education link:\n"
# "🔗 https://ue.edu.pk/allfeestructure.php"
# )
# Similarity Calculation
user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
best_match_idx = similarities.argmax().item()
best_score = similarities[best_match_idx].item()
if best_score >= 0.65:
# --- PATH 1: DATASET MATCH (Enhanced Formatting) ---
original_answer = dataset_answers[best_match_idx]
prompt = f"""You are the official University of Education (UOE) Assistant.
I have found a verified answer in our database. Please rephrase it to make it extremely professional and user-friendly.
STRICT GUIDELINES:
1. Use clear **Headings**.
2. Use **Bullet Points** for lists.
3. If there is data like timing, criteria, or contact info, present it in a **Markdown Table**.
4. Bold important keywords.
5. Maintain a polite and welcoming tone.
User Question: {user_input}
Verified Data: {original_answer}
Enhanced Response:"""
else:
# --- PATH 2: GENERAL KNOWLEDGE (with Forwarding Note) ---
manage_unmatched_queries(user_input)
prompt = f"""You are the UOE AI Assistant for University of Education Lahore.
The user asked: "{user_input}". This query is not in our verified database.
Instructions:
1. Answer based on your general knowledge about UOE Lahore.
2. Format the response with headings and bold text.
3. At the end, you MUST add this exact text:
"📢 *Note: Your query has been forwarded to our support team. We are currently updating our verified database to provide a verified answer next time.*"
4. List official contacts: Website: https://ue.edu.pk | Phone: +92-42-99262231-33"""
return query_groq_llm(prompt) |