nex / app.py
byteforcegokul's picture
Update app.py
e4fb7a3 verified
import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Core logic to retrieve top MCQs
def get_top_mcqs(user_input, domain, subdomain, df, top_n=10):
domain = domain.strip().lower()
subdomain = subdomain.strip()
filtered_df = df[(df['domain'] == domain) & (df['subdomain'] == subdomain)]
if filtered_df.empty:
return pd.DataFrame()
documents = filtered_df['keywords'].tolist()
documents.insert(0, user_input)
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
top_indices = cosine_sim.argsort()[-top_n:][::-1]
top_questions = filtered_df.iloc[top_indices].copy()
top_questions['similarity_score'] = cosine_sim[top_indices]
return top_questions.reset_index(drop=True)
# Quiz generation logic
def run_quiz(domain, subdomain, keyword_input, df):
mcq_df = get_top_mcqs(keyword_input, domain, subdomain, df)
if mcq_df.empty:
return "⚠️ No questions found for the selected domain/subdomain."
quiz_output = ""
for i, row in mcq_df.iterrows():
quiz_output += f"Q{i+1}: {row['question']}\n"
quiz_output += f"A. {row['option1']}\n"
quiz_output += f"B. {row['option2']}\n"
quiz_output += f"C. {row['option3']}\n"
quiz_output += f"D. {row['option4']}\n"
quiz_output += f"(✅ Correct Answer: {row['correct_answer']})\n\n"
return quiz_output
# Dynamic subdomain update
def update_subdomains(domain, df):
domain = domain.strip().lower()
subdomains = df[df["domain"] == domain]["subdomain"].dropna().unique().tolist()
return gr.Dropdown.update(choices=sorted(subdomains), value=None)
# Load and handle file upload
def load_and_update(file):
if file is not None:
try:
df = pd.read_csv(file.name) # Read the CSV file
if df.empty:
return "⚠️ The uploaded dataset is empty."
# Ensure expected columns exist
required_columns = ['domain', 'subdomain', 'keywords', 'question', 'option1', 'option2', 'option3', 'option4', 'correct_answer']
missing_columns = set(required_columns) - set(df.columns)
if missing_columns:
return f"⚠️ Missing required columns in the dataset: {', '.join(missing_columns)}"
# Normalize text to avoid case/whitespace mismatches
df['domain'] = df['domain'].str.strip().str.lower()
df['subdomain'] = df['subdomain'].str.strip()
# Update domain dropdown choices
domains = sorted(df['domain'].unique().tolist())
domain_dropdown.update(choices=domains)
return df
except Exception as e:
return f"Error loading file: {str(e)}"
return None
# Gradio UI
def launch_interface():
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Domain-Based MCQ Quiz System")
with gr.Row():
domain_dropdown = gr.Dropdown(label="Select Domain", choices=[])
subdomain_dropdown = gr.Dropdown(label="Select Subdomain", choices=[])
file_upload = gr.File(label="Upload MCQ Dataset (CSV)", type="file")
# Update domain list when file is uploaded
file_upload.change(fn=load_and_update, inputs=file_upload, outputs=None)
domain_dropdown.change(fn=update_subdomains, inputs=domain_dropdown, outputs=subdomain_dropdown)
keyword_input = gr.Textbox(label="Enter keywords or topic")
quiz_button = gr.Button("Get Top MCQs")
quiz_output = gr.Textbox(label="Quiz Questions", lines=20)
quiz_button.click(fn=run_quiz, inputs=[domain_dropdown, subdomain_dropdown, keyword_input, file_upload], outputs=quiz_output)
demo.launch()
# Run the interface
launch_interface()