import gradio as gr import os from datetime import datetime import json import re import zipfile import docx from docx import Document import pdfplumber from langchain_groq import ChatGroq from langchain_community.utilities import SerpAPIWrapper from langchain_core.messages import HumanMessage import warnings warnings.filterwarnings('ignore') def init_models(): groq_key = os.getenv("GROQ_API_KEY") serp_key = os.getenv("SERPAPI_API_KEY") if not groq_key or not serp_key: return None, None, None, None model_question_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key) model_answer_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key) model_trend_analyzer = ChatGroq(model="groq/compound", api_key=groq_key) serp = SerpAPIWrapper(serpapi_api_key=serp_key) return model_question_gen, model_answer_gen, model_trend_analyzer, serp def extract_docx(path): d = docx.Document(path) return "\n".join(p.text for p in d.paragraphs if p.text.strip()) def extract_pdf(path): out = [] with pdfplumber.open(path) as pdf: for p in pdf.pages: t = p.extract_text() if t: out.append(t) return "\n".join(out) def extract_text(path): if path.endswith(".pdf"): return extract_pdf(path) if path.endswith(".docx"): return extract_docx(path) return open(path, 'r', encoding='utf-8').read() def sanitize_json(text): text = text.replace("```json", "").replace("```", "") m = re.search(r"(\{.*\})", text, flags=re.DOTALL) if not m: raise RuntimeError(f"ERROR: LLM did NOT return JSON.") blob = m.group(1) blob = re.sub(r",\s*(\}|\])", r"\1", blob) return json.loads(blob) def extract_units(syllabus_text, unit_range): unit_range = unit_range.replace(" ", "") if "-" in unit_range: start, end = map(int, unit_range.split("-")) units = list(range(start, end + 1)) else: units = [int(u) for u in unit_range.split(",")] unit_header_pattern = r"(UNIT[\s\-—:]*([0-9IVX]+))" matches = list(re.finditer(unit_header_pattern, syllabus_text, flags=re.I)) if not matches: return syllabus_text unit_blocks = [] for i, m in enumerate(matches): raw_num = m.group(2).strip() try: if raw_num.isdigit(): num = int(raw_num) else: roman_map = {"I":1,"II":2,"III":3,"IV":4,"V":5,"VI":6,"VII":7,"VIII":8,"IX":9,"X":10} num = roman_map.get(raw_num.upper(), None) except: num = None if num: start = m.start() end = matches[i+1].start() if i+1 < len(matches) else len(syllabus_text) unit_blocks.append((num, start, end)) extracted = "" for u in units: for block in unit_blocks: if block[0] == u: extracted += syllabus_text[block[1]:block[2]].strip() + "\n\n" return extracted.strip() if extracted else syllabus_text def apply_MAANGO_BIG15_framework(base_prompt): maango_block = """ === MAANGO BIG15 ADVANCED QUESTION ENGINE FRAMEWORK === You MUST follow ALL 15 pillars while generating the question paper: 1. M — Multi-Cognitive Bloom Alignment – Strict distribution of Remember/Understand/Apply/Analyze/Evaluate/Create. – 40% minimum higher-order (Apply/Analyze/Evaluate/Create). 2. A — Apply–Analyze Weightage Boost – At least 2 questions in Part B and 1 in Part C MUST be scenario/analytical. 3. A — Adaptive Difficulty Index – Maintain difficulty index between 1.8 and 2.5 for every question. 4. N — Non-Repetitive Deep Coverage – No overlap in concepts, no repeated phrasing. 5. G — Granular Unit Balancing – Ensure questions cover all units proportionately. 6. O — Outcome Mapping Discipline – Every question must be CO-aligned using strong engineering verbs. 7. B — BIG15 Industry Integration – Each question must include TCS/Infosys/Wipro/Accenture/Generic relevance. 8. I — Industry Application Layer – At least 20% questions include real-world industry scenarios. 9. G — GATE Layer Injection – Higher-order questions must reflect GATE-style design/analysis depth. 10. 1 — First-Half / Second-Half Coverage Integrity – Early questions from Unit 1–2, later from Unit 3–5. 11. 5 — Five-Unit Symmetry – Maintain balance across all parts (A/B/C). 12. S — Structured Output Discipline – Follow exact JSON schema, no deviations. 13. E — Exam-Mode Smart Switching – Auto-adjust tone depending on CA / ESE / GATE mode. 14. T — Technical Depth Enforcement – Strong technical keywords, no vague verbs. 15. H — Holistic Coherence – Ensure clarity, correctness, industry relevance, and zero ambiguity. === END OF MAANGO BIG15 FRAMEWORK === """ return maango_block + "\n\n" + base_prompt def build_question_prompt(subject, syllabus, numA, numB, numC, exam_mode): base_prompt = f""" You are an exam generator for {exam_mode} mode. Output ONLY VALID JSON. STRICT JSON SCHEMA: {{ "metadata": {{ "subject": "{subject}", "date": "{datetime.now().strftime('%Y-%m-%d')}" }}, "partA": [ {{ "question_text": "string", "marks": 2 or 3, "unit": "number", "bloom_level": "Remember/Understand/Apply/Analyze/Evaluate/Create", "company_tag": "TCS/Infosys/Wipro/Generic" }} ], "partB": [ {{ "either": {{ "question_text": "string", "marks": 10, "unit": "number", "bloom_level": "Analyze/Evaluate/Create", "company_tag": "TCS/Infosys/Wipro/Generic" }}, "or": {{ "question_text": "string", "marks": 10, "unit": "number", "bloom_level": "Analyze/Evaluate/Create", "company_tag": "TCS/Infosys/Wipro/Generic" }} }} ], "partC": [ {{ "either": {{ "question_text": "string", "marks": 15, "unit": "number", "bloom_level": "Create/Evaluate", "company_tag": "TCS/Infosys/Wipro/Generic" }}, "or": {{ "question_text": "string", "marks": 15, "unit": "number", "bloom_level": "Create/Evaluate", "company_tag": "TCS/Infosys/Wipro/Generic" }} }} ] }} REQUIREMENTS: - Generate EXACTLY {numA} questions in partA. - Generate EXACTLY {numB} pairs in partB. - Generate EXACTLY {numC} pairs in partC. - All questions must map correctly to the syllabus. - Maintain unit coverage balance. - Use industry relevance when appropriate. Syllabus: {syllabus} Return ONLY pure JSON. No commentary. """ return apply_MAANGO_BIG15_framework(base_prompt) def apply_MAANGO_BIG15_answerkey(base_prompt): maango_key_block = """ === MAANGO BIG15 ADVANCED ANSWER-KEY FRAMEWORK === All answers MUST follow the 15 MAANGO pillars. Specifically: 1. Bloom Alignment – Answer must match Bloom level of the original question. – Higher-order – include reasoning, evaluation, derivation, design logic. 2. Industry Integration – Wherever possible, include short 1–2 line relevance to TCS/Infosys/Wipro/Accenture/Generic IT. 3. Correctness & Precision – No vague words. Use technical definitions only. 4. Depth Control – Part A: concise (3–6 lines) – Part B: structured multi-step reasoning – Part C: full analytical/creative design solution 5. Non-Repetition – Do not reuse the same explanation style across answers. 6. Difficulty Index Match – Keep answer complexity aligned with the question's difficulty. 7. Structured JSON Schema Enforcement – Must follow the answer-key schema EXACTLY. 8. GATE Style Reinforcement – For higher-order questions, include formulas, assumptions, models. 9. Holistic Clarity – Avoid ambiguities. Provide crisp, exam-ready answers. === END OF MAANGO BIG15 ANSWER-KEY FRAMEWORK === """ return maango_key_block + "\n\n" + base_prompt def build_answer_prompt(syllabus, questions_json): base_prompt = f""" You are an expert answer key generator. Generate ANSWERS ONLY IN VALID JSON. STRICT JSON FORMAT: {{ "partA": [ {{ "question_text": "same as input", "answer": "detailed but concise answer", "marks": number }} ], "partB": [ {{ "question_text": "either OR question merged or handled individually", "answer": "model answer", "marks": 10 }} ], "partC": [ {{ "question_text": "either OR question merged or handled individually", "answer": "model answer", "marks": 15 }} ] }} INPUT QUESTIONS: {json.dumps(questions_json, indent=2)} RULES: - DO NOT change question text. - Provide complete, correct answers. - Return ONLY CLEAN JSON. """ return apply_MAANGO_BIG15_answerkey(base_prompt) def apply_MAANGO_BIG15_trendanalysis(base_prompt): maango_trend_block = """ === MAANGO BIG15 TREND ANALYSIS FRAMEWORK === All analysis MUST follow: 1. Bloom Pattern Extraction – Count and percentage across all levels. 2. Difficulty Distribution Mapping – Detect imbalance or deviations from DI = 1.8–2.5. 3. Unit Coverage Analysis – Identify over-covered / under-covered units. 4. Industry Relevance Analysis – Evaluate TCS/Infosys/Wipro/Generic tag distribution. 5. Question-Type Distribution – Short-answer vs descriptive vs design-oriented. 6. MAANGO Compliance Scoring – Compute 0–100% MAANGO BIG15 alignment score. 7. Improvement Suggestions – Provide actionable, unit-wise refinements. 8. GATE/ESE Competency Mapping – Rate analytical & application strength. 9. Holistic Insights – Detect patterns indicating strong or weak syllabus areas. === END OF MAANGO BIG15 TREND ANALYSIS FRAMEWORK === """ return maango_trend_block + "\n\n" + base_prompt def build_trend_prompt(subject, q_json): base_prompt = f""" You are an exam trend analyzer. Output ONLY VALID JSON. STRICT SCHEMA: {{ "bloom_distribution": {{ "Remember": 0, "Understand": 0, "Apply": 0, "Analyze": 0, "Evaluate": 0, "Create": 0 }}, "difficulty_insights": "string", "industry_tag_stats": {{ "TCS": 0, "Infosys": 0, "Wipro": 0, "Generic": 0 }}, "unit_coverage": {{ "unit1": 0, "unit2": 0, "unit3": 0, "unit4": 0, "unit5": 0 }}, "maango_score": "0-100", "improvement_suggestions": "string" }} RULE: Use the "unit" field inside every question object to compute unit_coverage. DO NOT guess. SUBJECT: {subject} QUESTIONS FOR ANALYSIS: {json.dumps(q_json, indent=2)} """ return apply_MAANGO_BIG15_trendanalysis(base_prompt) def create_question_paper(code, name, partA, partB, partC, output_path): doc = Document() doc.add_heading("SNS College of Technology", level=1) doc.add_paragraph(f"Subject Code: {code} Subject: {name}") doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}") doc.add_paragraph("Instructions: Answer as per parts and marks specified.\n") doc.add_heading("Part A (Short Answer)", level=2) for idx, q in enumerate(partA, 1): text = f"{idx}. {q.get('question_text','')}\nMarks: {q.get('marks',0)} | Unit: {q.get('unit','')} | Bloom: {q.get('bloom_level','')} | Tag: {q.get('company_tag','')}" doc.add_paragraph(text) doc.add_heading("Part B (Either/Or Questions)", level=2) start_idx = len(partA)+1 for idx, pair in enumerate(partB, start_idx): either = pair.get("either", {}) orq = pair.get("or", {}) doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}") doc.add_paragraph(f" Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}") doc.add_heading("Part C (Case/Design Questions)", level=2) start_idx = start_idx + len(partB) for idx, pair in enumerate(partC, start_idx): either = pair.get("either", {}) orq = pair.get("or", {}) doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}") doc.add_paragraph(f" Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}") doc.save(output_path) def create_answer_key(code, name, answers, output_path): doc = Document() doc.add_heading(f"{name} - Answer Key", level=1) doc.add_paragraph(f"Subject Code: {code}\n") doc.add_heading("Part A Answers", level=2) for idx, a in enumerate(answers.get("partA", []), 1): doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}") doc.add_heading("Part B Answers (Either option)", level=2) start_idx = len(answers.get("partA", []))+1 for idx, a in enumerate(answers.get("partB", []), start_idx): doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}") doc.add_heading("Part C Answers (Either option)", level=2) start_idx += len(answers.get("partB", [])) for idx, a in enumerate(answers.get("partC", []), start_idx): doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}") doc.save(output_path) def create_trend_report(code, name, trend_json, serp_data, output_path): doc = Document() doc.add_heading(f"{name} - Trend Analysis Report", level=1) doc.add_paragraph(f"Subject Code: {code}") doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}\n") bd = trend_json.get("bloom_distribution", {}) doc.add_heading("Bloom Distribution", level=2) if bd: for k, v in bd.items(): doc.add_paragraph(f"- {k}: {v}", style='List Bullet') else: doc.add_paragraph("No bloom distribution data found.") doc.add_heading("Difficulty Insights", level=2) diff = trend_json.get("difficulty_insights", "") doc.add_paragraph(diff if diff else "No difficulty insights provided.") doc.add_heading("Industry Tag Statistics", level=2) its = trend_json.get("industry_tag_stats", {}) if its: for k, v in its.items(): doc.add_paragraph(f"- {k}: {v}", style='List Bullet') else: doc.add_paragraph("No industry tag stats found.") doc.add_heading("Unit Coverage", level=2) uc = trend_json.get("unit_coverage", {}) if uc: for k, v in uc.items(): doc.add_paragraph(f"- {k}: {v}", style='List Bullet') else: doc.add_paragraph("No unit coverage data found.") doc.add_heading("MAANGO BIG15 Compliance Score", level=2) maango = trend_json.get("maango_score", "") doc.add_paragraph(f"Score: {maango if maango else 'N/A'}") doc.add_heading("Improvement Suggestions", level=2) sugg = trend_json.get("improvement_suggestions", "") doc.add_paragraph(sugg if sugg else "No suggestions provided.") if serp_data: doc.add_heading("SERP / Reference Data (summary)", level=2) try: serp_text = json.dumps(serp_data, indent=2) if not isinstance(serp_data, str) else serp_data except Exception: serp_text = str(serp_data) max_chars = 6000 if len(serp_text) > max_chars: serp_text = serp_text[:max_chars] + "\n\n[Truncated for brevity]" for line in serp_text.splitlines(): doc.add_paragraph(line) doc.save(output_path) def generate_exam(exam_mode, subject, code, units, numA, numB, numC, syllabus_file): try: model_q, model_a, model_t, serp = init_models() if not model_q: return None, "❌ API Keys not configured. Please set GROQ_API_KEY and SERPAPI_API_KEY in Hugging Face Spaces secrets." syllabus_text = extract_text(syllabus_file.name) selected_syllabus = extract_units(syllabus_text, units) if not selected_syllabus.strip(): return None, "❌ No syllabus found for given units." # STEP 1: Generate Questions q_prompt = build_question_prompt(subject, selected_syllabus, numA, numB, numC, exam_mode) q_raw = model_q.invoke([HumanMessage(content=q_prompt)]).content q_json = sanitize_json(q_raw) partA = q_json.get("partA", []) partB = q_json.get("partB", []) partC = q_json.get("partC", []) if not (partA or partB or partC): return None, "❌ Generated question JSON missing required parts." # STEP 2: Fetch SERP data try: serp_data = serp.run(f"{subject} latest industry syllabus questions trends") except Exception as e: serp_data = f"SERP fetch failed: {e}" # STEP 3: Analyze trends t_prompt = build_trend_prompt(subject, q_json) t_raw = model_t.invoke([HumanMessage(content=t_prompt)]).content t_json = sanitize_json(t_raw) # STEP 4: Generate Answer Key a_prompt = build_answer_prompt(selected_syllabus, q_json) a_raw = model_a.invoke([HumanMessage(content=a_prompt)]).content a_json = sanitize_json(a_raw) # STEP 5: Create DOCX files qp_file = f"{code}_QuestionPaper.docx" ak_file = f"{code}_AnswerKey.docx" ta_file = f"{code}_TrendAnalysis.docx" create_question_paper(code, subject, partA, partB, partC, qp_file) create_answer_key(code, subject, a_json, ak_file) create_trend_report(code, subject, t_json, serp_data, ta_file) zip_file = f"{code}_ExamPackage.zip" with zipfile.ZipFile(zip_file, 'w') as zipf: zipf.write(qp_file) zipf.write(ak_file) zipf.write(ta_file) return zip_file, f"✅ Successfully generated exam package for {subject}!\n📊 MAANGO Score: {t_json.get('maango_score', 'N/A')}" except Exception as e: return None, f"❌ Error: {str(e)}" # Create the interface without inline CSS for older Gradio versions with gr.Blocks() as demo: gr.HTML("""

🎓 SNS Tech - Q&A Agent x Codeboosters Tech

AI-Powered Question Paper & Answer Key Generator with Trend Analysis

Powered by MAANGO BIG15 Framework | Advanced LLM Technology | Developed by Codeboosters Tech Team

""") with gr.Row(): with gr.Column(scale=1): gr.HTML('

📋 Exam Configuration

') exam_mode = gr.Dropdown( choices=["Continuous Assessment (CA)", "End Semester Exam (ESE)", "GATE Style Internal Exam"], label="Exam Mode", value="End Semester Exam (ESE)" ) subject = gr.Textbox(label="Subject Name", placeholder="e.g., Data Structures") code = gr.Textbox(label="Subject Code", placeholder="e.g., CS301") units = gr.Textbox(label="Units Range", value="1-5", placeholder="e.g., 1-3 or 1,3,5") gr.HTML('

📊 Question Distribution

') with gr.Row(): numA = gr.Number(label="Part A (Short)", value=10, precision=0) numB = gr.Number(label="Part B (Descriptive)", value=5, precision=0) numC = gr.Number(label="Part C (Case Study)", value=1, precision=0) with gr.Column(scale=1): gr.HTML('

📁 Syllabus Upload

') syllabus_file = gr.File(label="Upload Syllabus", file_types=[".pdf", ".docx", ".txt"]) gr.HTML("""

✨ MAANGO BIG15 Features

""") generate_btn = gr.Button("🚀 Generate Complete Exam Package", variant="primary", size="lg") with gr.Row(): output_file = gr.File(label="📦 Download Package (Question Paper + Answer Key + Trend Analysis)") status_msg = gr.Textbox(label="Status", lines=3) generate_btn.click( fn=generate_exam, inputs=[exam_mode, subject, code, units, numA, numB, numC, syllabus_file], outputs=[output_file, status_msg] ) gr.HTML(""" """) if __name__ == "__main__": demo.launch()