import pandas as pd import gradio as gr from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # Core logic to retrieve top MCQs def get_top_mcqs(user_input, domain, subdomain, df, top_n=10): domain = domain.strip().lower() subdomain = subdomain.strip() filtered_df = df[(df['domain'] == domain) & (df['subdomain'] == subdomain)] if filtered_df.empty: return pd.DataFrame() documents = filtered_df['keywords'].tolist() documents.insert(0, user_input) vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(documents) cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() top_indices = cosine_sim.argsort()[-top_n:][::-1] top_questions = filtered_df.iloc[top_indices].copy() top_questions['similarity_score'] = cosine_sim[top_indices] return top_questions.reset_index(drop=True) # Quiz generation logic def run_quiz(domain, subdomain, keyword_input, df): mcq_df = get_top_mcqs(keyword_input, domain, subdomain, df) if mcq_df.empty: return "⚠️ No questions found for the selected domain/subdomain." quiz_output = "" for i, row in mcq_df.iterrows(): quiz_output += f"Q{i+1}: {row['question']}\n" quiz_output += f"A. {row['option1']}\n" quiz_output += f"B. {row['option2']}\n" quiz_output += f"C. {row['option3']}\n" quiz_output += f"D. {row['option4']}\n" quiz_output += f"(✅ Correct Answer: {row['correct_answer']})\n\n" return quiz_output # Dynamic subdomain update def update_subdomains(domain, df): domain = domain.strip().lower() subdomains = df[df["domain"] == domain]["subdomain"].dropna().unique().tolist() return gr.Dropdown.update(choices=sorted(subdomains), value=None) # Load and handle file upload def load_and_update(file): if file is not None: try: df = pd.read_csv(file.name) # Read the CSV file if df.empty: return "⚠️ The uploaded dataset is empty." # Ensure expected columns exist required_columns = ['domain', 'subdomain', 'keywords', 'question', 'option1', 'option2', 'option3', 'option4', 'correct_answer'] missing_columns = set(required_columns) - set(df.columns) if missing_columns: return f"⚠️ Missing required columns in the dataset: {', '.join(missing_columns)}" # Normalize text to avoid case/whitespace mismatches df['domain'] = df['domain'].str.strip().str.lower() df['subdomain'] = df['subdomain'].str.strip() # Update domain dropdown choices domains = sorted(df['domain'].unique().tolist()) domain_dropdown.update(choices=domains) return df except Exception as e: return f"Error loading file: {str(e)}" return None # Gradio UI def launch_interface(): with gr.Blocks() as demo: gr.Markdown("## 🧠 Domain-Based MCQ Quiz System") with gr.Row(): domain_dropdown = gr.Dropdown(label="Select Domain", choices=[]) subdomain_dropdown = gr.Dropdown(label="Select Subdomain", choices=[]) file_upload = gr.File(label="Upload MCQ Dataset (CSV)", type="file") # Update domain list when file is uploaded file_upload.change(fn=load_and_update, inputs=file_upload, outputs=None) domain_dropdown.change(fn=update_subdomains, inputs=domain_dropdown, outputs=subdomain_dropdown) keyword_input = gr.Textbox(label="Enter keywords or topic") quiz_button = gr.Button("Get Top MCQs") quiz_output = gr.Textbox(label="Quiz Questions", lines=20) quiz_button.click(fn=run_quiz, inputs=[domain_dropdown, subdomain_dropdown, keyword_input, file_upload], outputs=quiz_output) demo.launch() # Run the interface launch_interface()