Spaces:

codeboosterstech
/

SNSQ

Sleeping

File size: 23,382 Bytes

import gradio as gr
import os
from datetime import datetime
import json
import re
import zipfile
import docx
from docx import Document
import pdfplumber
from langchain_groq import ChatGroq
from langchain_community.utilities import SerpAPIWrapper
from langchain_core.messages import HumanMessage
import warnings
warnings.filterwarnings('ignore')

def init_models():
    groq_key = os.getenv("GROQ_API_KEY")
    serp_key = os.getenv("SERPAPI_API_KEY")
    
    if not groq_key or not serp_key:
        return None, None, None, None
    
    model_question_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key)
    model_answer_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key)
    model_trend_analyzer = ChatGroq(model="groq/compound", api_key=groq_key)
    serp = SerpAPIWrapper(serpapi_api_key=serp_key)
    
    return model_question_gen, model_answer_gen, model_trend_analyzer, serp

def extract_docx(path):
    d = docx.Document(path)
    return "\n".join(p.text for p in d.paragraphs if p.text.strip())

def extract_pdf(path):
    out = []
    with pdfplumber.open(path) as pdf:
        for p in pdf.pages:
            t = p.extract_text()
            if t: out.append(t)
    return "\n".join(out)

def extract_text(path):
    if path.endswith(".pdf"): return extract_pdf(path)
    if path.endswith(".docx"): return extract_docx(path)
    return open(path, 'r', encoding='utf-8').read()

def sanitize_json(text):
    text = text.replace("```json", "").replace("```", "")
    m = re.search(r"(\{.*\})", text, flags=re.DOTALL)
    if not m:
        raise RuntimeError(f"ERROR: LLM did NOT return JSON.")
    blob = m.group(1)
    blob = re.sub(r",\s*(\}|\])", r"\1", blob)
    return json.loads(blob)

def extract_units(syllabus_text, unit_range):
    unit_range = unit_range.replace(" ", "")
    if "-" in unit_range:
        start, end = map(int, unit_range.split("-"))
        units = list(range(start, end + 1))
    else:
        units = [int(u) for u in unit_range.split(",")]
    
    unit_header_pattern = r"(UNIT[\s\-—:]*([0-9IVX]+))"
    matches = list(re.finditer(unit_header_pattern, syllabus_text, flags=re.I))
    
    if not matches:
        return syllabus_text
    
    unit_blocks = []
    for i, m in enumerate(matches):
        raw_num = m.group(2).strip()
        try:
            if raw_num.isdigit():
                num = int(raw_num)
            else:
                roman_map = {"I":1,"II":2,"III":3,"IV":4,"V":5,"VI":6,"VII":7,"VIII":8,"IX":9,"X":10}
                num = roman_map.get(raw_num.upper(), None)
        except:
            num = None
        
        if num:
            start = m.start()
            end = matches[i+1].start() if i+1 < len(matches) else len(syllabus_text)
            unit_blocks.append((num, start, end))
    
    extracted = ""
    for u in units:
        for block in unit_blocks:
            if block[0] == u:
                extracted += syllabus_text[block[1]:block[2]].strip() + "\n\n"
    
    return extracted.strip() if extracted else syllabus_text

def apply_MAANGO_BIG15_framework(base_prompt):
    maango_block = """
=== MAANGO BIG15 ADVANCED QUESTION ENGINE FRAMEWORK ===

You MUST follow ALL 15 pillars while generating the question paper:

1. M — Multi-Cognitive Bloom Alignment
   – Strict distribution of Remember/Understand/Apply/Analyze/Evaluate/Create.
   – 40% minimum higher-order (Apply/Analyze/Evaluate/Create).

2. A — Apply–Analyze Weightage Boost
   – At least 2 questions in Part B and 1 in Part C MUST be scenario/analytical.

3. A — Adaptive Difficulty Index
   – Maintain difficulty index between 1.8 and 2.5 for every question.

4. N — Non-Repetitive Deep Coverage
   – No overlap in concepts, no repeated phrasing.

5. G — Granular Unit Balancing
   – Ensure questions cover all units proportionately.

6. O — Outcome Mapping Discipline
   – Every question must be CO-aligned using strong engineering verbs.

7. B — BIG15 Industry Integration
   – Each question must include TCS/Infosys/Wipro/Accenture/Generic relevance.

8. I — Industry Application Layer
   – At least 20% questions include real-world industry scenarios.

9. G — GATE Layer Injection
   – Higher-order questions must reflect GATE-style design/analysis depth.

10. 1 — First-Half / Second-Half Coverage Integrity
    – Early questions from Unit 1–2, later from Unit 3–5.

11. 5 — Five-Unit Symmetry
    – Maintain balance across all parts (A/B/C).

12. S — Structured Output Discipline
    – Follow exact JSON schema, no deviations.

13. E — Exam-Mode Smart Switching
    – Auto-adjust tone depending on CA / ESE / GATE mode.

14. T — Technical Depth Enforcement
    – Strong technical keywords, no vague verbs.

15. H — Holistic Coherence
    – Ensure clarity, correctness, industry relevance, and zero ambiguity.

=== END OF MAANGO BIG15 FRAMEWORK ===
"""
    return maango_block + "\n\n" + base_prompt

def build_question_prompt(subject, syllabus, numA, numB, numC, exam_mode):
    base_prompt = f"""
You are an exam generator for {exam_mode} mode. Output ONLY VALID JSON.

STRICT JSON SCHEMA:

{{
  "metadata": {{
    "subject": "{subject}",
    "date": "{datetime.now().strftime('%Y-%m-%d')}"
  }},
  "partA": [
    {{
      "question_text": "string",
      "marks": 2 or 3,
      "unit": "number",
      "bloom_level": "Remember/Understand/Apply/Analyze/Evaluate/Create",
      "company_tag": "TCS/Infosys/Wipro/Generic"
    }}
  ],
  "partB": [
    {{
      "either": {{
        "question_text": "string",
        "marks": 10,
        "unit": "number",
        "bloom_level": "Analyze/Evaluate/Create",
        "company_tag": "TCS/Infosys/Wipro/Generic"
      }},
      "or": {{
        "question_text": "string",
        "marks": 10,
        "unit": "number",
        "bloom_level": "Analyze/Evaluate/Create",
        "company_tag": "TCS/Infosys/Wipro/Generic"
      }}
    }}
  ],
  "partC": [
    {{
      "either": {{
        "question_text": "string",
        "marks": 15,
        "unit": "number",
        "bloom_level": "Create/Evaluate",
        "company_tag": "TCS/Infosys/Wipro/Generic"
      }},
      "or": {{
        "question_text": "string",
        "marks": 15,
        "unit": "number",
        "bloom_level": "Create/Evaluate",
        "company_tag": "TCS/Infosys/Wipro/Generic"
      }}
    }}
  ]
}}

REQUIREMENTS:
- Generate EXACTLY {numA} questions in partA.
- Generate EXACTLY {numB} pairs in partB.
- Generate EXACTLY {numC} pairs in partC.
- All questions must map correctly to the syllabus.
- Maintain unit coverage balance.
- Use industry relevance when appropriate.

Syllabus:
{syllabus}

Return ONLY pure JSON. No commentary.
"""
    return apply_MAANGO_BIG15_framework(base_prompt)

def apply_MAANGO_BIG15_answerkey(base_prompt):
    maango_key_block = """
=== MAANGO BIG15 ADVANCED ANSWER-KEY FRAMEWORK ===

All answers MUST follow the 15 MAANGO pillars. Specifically:

1. Bloom Alignment
   – Answer must match Bloom level of the original question.
   – Higher-order – include reasoning, evaluation, derivation, design logic.

2. Industry Integration
   – Wherever possible, include short 1–2 line relevance to TCS/Infosys/Wipro/Accenture/Generic IT.

3. Correctness & Precision
   – No vague words. Use technical definitions only.

4. Depth Control
   – Part A: concise (3–6 lines)
   – Part B: structured multi-step reasoning
   – Part C: full analytical/creative design solution

5. Non-Repetition
   – Do not reuse the same explanation style across answers.

6. Difficulty Index Match
   – Keep answer complexity aligned with the question's difficulty.

7. Structured JSON Schema Enforcement
   – Must follow the answer-key schema EXACTLY.

8. GATE Style Reinforcement
   – For higher-order questions, include formulas, assumptions, models.

9. Holistic Clarity
   – Avoid ambiguities. Provide crisp, exam-ready answers.

=== END OF MAANGO BIG15 ANSWER-KEY FRAMEWORK ===
"""
    return maango_key_block + "\n\n" + base_prompt

def build_answer_prompt(syllabus, questions_json):
    base_prompt = f"""
You are an expert answer key generator.

Generate ANSWERS ONLY IN VALID JSON.

STRICT JSON FORMAT:

{{
  "partA": [
    {{
      "question_text": "same as input",
      "answer": "detailed but concise answer",
      "marks": number
    }}
  ],
  "partB": [
    {{
      "question_text": "either OR question merged or handled individually",
      "answer": "model answer",
      "marks": 10
    }}
  ],
  "partC": [
    {{
      "question_text": "either OR question merged or handled individually",
      "answer": "model answer",
      "marks": 15
    }}
  ]
}}

INPUT QUESTIONS:
{json.dumps(questions_json, indent=2)}

RULES:
- DO NOT change question text.
- Provide complete, correct answers.
- Return ONLY CLEAN JSON.
"""
    return apply_MAANGO_BIG15_answerkey(base_prompt)

def apply_MAANGO_BIG15_trendanalysis(base_prompt):
    maango_trend_block = """
=== MAANGO BIG15 TREND ANALYSIS FRAMEWORK ===

All analysis MUST follow:

1. Bloom Pattern Extraction
   – Count and percentage across all levels.

2. Difficulty Distribution Mapping
   – Detect imbalance or deviations from DI = 1.8–2.5.

3. Unit Coverage Analysis
   – Identify over-covered / under-covered units.

4. Industry Relevance Analysis
   – Evaluate TCS/Infosys/Wipro/Generic tag distribution.

5. Question-Type Distribution
   – Short-answer vs descriptive vs design-oriented.

6. MAANGO Compliance Scoring
   – Compute 0–100% MAANGO BIG15 alignment score.

7. Improvement Suggestions
   – Provide actionable, unit-wise refinements.

8. GATE/ESE Competency Mapping
   – Rate analytical & application strength.

9. Holistic Insights
   – Detect patterns indicating strong or weak syllabus areas.

=== END OF MAANGO BIG15 TREND ANALYSIS FRAMEWORK ===
"""
    return maango_trend_block + "\n\n" + base_prompt

def build_trend_prompt(subject, q_json):
    base_prompt = f"""
You are an exam trend analyzer. Output ONLY VALID JSON.

STRICT SCHEMA:
{{
  "bloom_distribution": {{
    "Remember": 0,
    "Understand": 0,
    "Apply": 0,
    "Analyze": 0,
    "Evaluate": 0,
    "Create": 0
  }},
  "difficulty_insights": "string",
  "industry_tag_stats": {{
    "TCS": 0,
    "Infosys": 0,
    "Wipro": 0,
    "Generic": 0
  }},
  "unit_coverage": {{
    "unit1": 0,
    "unit2": 0,
    "unit3": 0,
    "unit4": 0,
    "unit5": 0
  }},
  "maango_score": "0-100",
  "improvement_suggestions": "string"
}}

RULE:
Use the "unit" field inside every question object to compute unit_coverage. DO NOT guess.

SUBJECT:
{subject}

QUESTIONS FOR ANALYSIS:
{json.dumps(q_json, indent=2)}
"""
    return apply_MAANGO_BIG15_trendanalysis(base_prompt)

def create_question_paper(code, name, partA, partB, partC, output_path):
    doc = Document()
    doc.add_heading("SNS College of Technology", level=1)
    doc.add_paragraph(f"Subject Code: {code}    Subject: {name}")
    doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}")
    doc.add_paragraph("Instructions: Answer as per parts and marks specified.\n")
    
    doc.add_heading("Part A (Short Answer)", level=2)
    for idx, q in enumerate(partA, 1):
        text = f"{idx}. {q.get('question_text','')}\nMarks: {q.get('marks',0)} | Unit: {q.get('unit','')} | Bloom: {q.get('bloom_level','')} | Tag: {q.get('company_tag','')}"
        doc.add_paragraph(text)
    
    doc.add_heading("Part B (Either/Or Questions)", level=2)
    start_idx = len(partA)+1
    for idx, pair in enumerate(partB, start_idx):
        either = pair.get("either", {})
        orq = pair.get("or", {})
        doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}")
        doc.add_paragraph(f"   Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}")
    
    doc.add_heading("Part C (Case/Design Questions)", level=2)
    start_idx = start_idx + len(partB)
    for idx, pair in enumerate(partC, start_idx):
        either = pair.get("either", {})
        orq = pair.get("or", {})
        doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}")
        doc.add_paragraph(f"   Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}")
    
    doc.save(output_path)

def create_answer_key(code, name, answers, output_path):
    doc = Document()
    doc.add_heading(f"{name} - Answer Key", level=1)
    doc.add_paragraph(f"Subject Code: {code}\n")
    
    doc.add_heading("Part A Answers", level=2)
    for idx, a in enumerate(answers.get("partA", []), 1):
        doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
    
    doc.add_heading("Part B Answers (Either option)", level=2)
    start_idx = len(answers.get("partA", []))+1
    for idx, a in enumerate(answers.get("partB", []), start_idx):
        doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
    
    doc.add_heading("Part C Answers (Either option)", level=2)
    start_idx += len(answers.get("partB", []))
    for idx, a in enumerate(answers.get("partC", []), start_idx):
        doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
    
    doc.save(output_path)

def create_trend_report(code, name, trend_json, serp_data, output_path):
    doc = Document()
    doc.add_heading(f"{name} - Trend Analysis Report", level=1)
    doc.add_paragraph(f"Subject Code: {code}")
    doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}\n")
    
    bd = trend_json.get("bloom_distribution", {})
    doc.add_heading("Bloom Distribution", level=2)
    if bd:
        for k, v in bd.items():
            doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
    else:
        doc.add_paragraph("No bloom distribution data found.")
    
    doc.add_heading("Difficulty Insights", level=2)
    diff = trend_json.get("difficulty_insights", "")
    doc.add_paragraph(diff if diff else "No difficulty insights provided.")
    
    doc.add_heading("Industry Tag Statistics", level=2)
    its = trend_json.get("industry_tag_stats", {})
    if its:
        for k, v in its.items():
            doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
    else:
        doc.add_paragraph("No industry tag stats found.")
    
    doc.add_heading("Unit Coverage", level=2)
    uc = trend_json.get("unit_coverage", {})
    if uc:
        for k, v in uc.items():
            doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
    else:
        doc.add_paragraph("No unit coverage data found.")
    
    doc.add_heading("MAANGO BIG15 Compliance Score", level=2)
    maango = trend_json.get("maango_score", "")
    doc.add_paragraph(f"Score: {maango if maango else 'N/A'}")
    
    doc.add_heading("Improvement Suggestions", level=2)
    sugg = trend_json.get("improvement_suggestions", "")
    doc.add_paragraph(sugg if sugg else "No suggestions provided.")
    
    if serp_data:
        doc.add_heading("SERP / Reference Data (summary)", level=2)
        try:
            serp_text = json.dumps(serp_data, indent=2) if not isinstance(serp_data, str) else serp_data
        except Exception:
            serp_text = str(serp_data)
        max_chars = 6000
        if len(serp_text) > max_chars:
            serp_text = serp_text[:max_chars] + "\n\n[Truncated for brevity]"
        for line in serp_text.splitlines():
            doc.add_paragraph(line)
    
    doc.save(output_path)

def generate_exam(exam_mode, subject, code, units, numA, numB, numC, syllabus_file):
    try:
        model_q, model_a, model_t, serp = init_models()
        if not model_q:
            return None, "❌ API Keys not configured. Please set GROQ_API_KEY and SERPAPI_API_KEY in Hugging Face Spaces secrets."
        
        syllabus_text = extract_text(syllabus_file.name)
        selected_syllabus = extract_units(syllabus_text, units)
        
        if not selected_syllabus.strip():
            return None, "❌ No syllabus found for given units."
        
        # STEP 1: Generate Questions
        q_prompt = build_question_prompt(subject, selected_syllabus, numA, numB, numC, exam_mode)
        q_raw = model_q.invoke([HumanMessage(content=q_prompt)]).content
        q_json = sanitize_json(q_raw)
        
        partA = q_json.get("partA", [])
        partB = q_json.get("partB", [])
        partC = q_json.get("partC", [])
        
        if not (partA or partB or partC):
            return None, "❌ Generated question JSON missing required parts."
        
        # STEP 2: Fetch SERP data
        try:
            serp_data = serp.run(f"{subject} latest industry syllabus questions trends")
        except Exception as e:
            serp_data = f"SERP fetch failed: {e}"
        
        # STEP 3: Analyze trends
        t_prompt = build_trend_prompt(subject, q_json)
        t_raw = model_t.invoke([HumanMessage(content=t_prompt)]).content
        t_json = sanitize_json(t_raw)
        
        # STEP 4: Generate Answer Key
        a_prompt = build_answer_prompt(selected_syllabus, q_json)
        a_raw = model_a.invoke([HumanMessage(content=a_prompt)]).content
        a_json = sanitize_json(a_raw)
        
        # STEP 5: Create DOCX files
        qp_file = f"{code}_QuestionPaper.docx"
        ak_file = f"{code}_AnswerKey.docx"
        ta_file = f"{code}_TrendAnalysis.docx"
        
        create_question_paper(code, subject, partA, partB, partC, qp_file)
        create_answer_key(code, subject, a_json, ak_file)
        create_trend_report(code, subject, t_json, serp_data, ta_file)
        
        zip_file = f"{code}_ExamPackage.zip"
        with zipfile.ZipFile(zip_file, 'w') as zipf:
            zipf.write(qp_file)
            zipf.write(ak_file)
            zipf.write(ta_file)
        
        return zip_file, f"✅ Successfully generated exam package for {subject}!\n📊 MAANGO Score: {t_json.get('maango_score', 'N/A')}"
        
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

# Create the interface without inline CSS for older Gradio versions
with gr.Blocks() as demo:
    gr.HTML("""
    <style>
    * {
        box-sizing: border-box;
    }
    .header-gradient {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 40px 20px;
        text-align: center;
        border-radius: 15px;
        margin-bottom: 30px;
        box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
    }
    .header-gradient h1 {
        font-size: 2.5rem;
        margin: 0 0 10px 0;
        font-weight: 700;
    }
    .header-gradient p {
        margin: 5px 0;
        font-size: 1.1rem;
        opacity: 0.95;
    }
    .feature-card {
        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
        padding: 25px;
        border-radius: 12px;
        border-left: 5px solid #667eea;
        box-shadow: 0 5px 15px rgba(0,0,0,0.1);
        height: 100%;
    }
    .feature-card h4 {
        color: #667eea;
        margin-top: 0;
        font-size: 1.3rem;
    }
    .feature-card ul {
        list-style: none;
        padding: 0;
        margin: 15px 0 0 0;
    }
    .feature-card li {
        padding: 8px 0;
        font-size: 1rem;
        color: #2d3748;
    }
    .section-title {
        color: #667eea;
        font-size: 1.4rem;
        font-weight: 600;
        margin: 20px 0 15px 0;
        padding-bottom: 10px;
        border-bottom: 2px solid #667eea;
    }
    footer {
        text-align: center;
        padding: 20px;
        color: #718096;
        margin-top: 40px;
        border-top: 1px solid #e2e8f0;
    }
    @media (max-width: 768px) {
        .header-gradient h1 {
            font-size: 1.8rem;
        }
        .header-gradient p {
            font-size: 0.95rem;
        }
    }
    </style>
    <div class="header-gradient">
        <h1>🎓 SNS Tech - Q&A Agent x Codeboosters Tech</h1>
        <p><strong>AI-Powered Question Paper & Answer Key Generator with Trend Analysis</strong></p>
        <p>Powered by MAANGO BIG15 Framework | Advanced LLM Technology | Developed by Codeboosters Tech Team</p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML('<h3 class="section-title">📋 Exam Configuration</h3>')
            exam_mode = gr.Dropdown(
                choices=["Continuous Assessment (CA)", "End Semester Exam (ESE)", "GATE Style Internal Exam"],
                label="Exam Mode",
                value="End Semester Exam (ESE)"
            )
            subject = gr.Textbox(label="Subject Name", placeholder="e.g., Data Structures")
            code = gr.Textbox(label="Subject Code", placeholder="e.g., CS301")
            units = gr.Textbox(label="Units Range", value="1-5", placeholder="e.g., 1-3 or 1,3,5")
            
            gr.HTML('<h3 class="section-title">📊 Question Distribution</h3>')
            with gr.Row():
                numA = gr.Number(label="Part A (Short)", value=10, precision=0)
                numB = gr.Number(label="Part B (Descriptive)", value=5, precision=0)
                numC = gr.Number(label="Part C (Case Study)", value=1, precision=0)
        
        with gr.Column(scale=1):
            gr.HTML('<h3 class="section-title">📁 Syllabus Upload</h3>')
            syllabus_file = gr.File(label="Upload Syllabus", file_types=[".pdf", ".docx", ".txt"])
            
            gr.HTML("""
            <div class="feature-card">
                <h4>✨ MAANGO BIG15 Features</h4>
                <ul>
                    <li>🎯 Multi-Cognitive Bloom Alignment</li>
                    <li>🏢 Industry Tag Integration (TCS/Infosys/Wipro)</li>
                    <li>📈 Granular Unit Balancing</li>
                    <li>🔍 GATE Layer Injection</li>
                    <li>📝 Automatic Answer Key Generation</li>
                    <li>📊 Advanced Trend Analysis with SERP Data</li>
                    <li>🎓 MAANGO Compliance Scoring (0-100)</li>
                    <li>💡 AI-Powered Improvement Suggestions</li>
                </ul>
            </div>
            """)
            
            generate_btn = gr.Button("🚀 Generate Complete Exam Package", variant="primary", size="lg")
    
    with gr.Row():
        output_file = gr.File(label="📦 Download Package (Question Paper + Answer Key + Trend Analysis)")
        status_msg = gr.Textbox(label="Status", lines=3)
    
    generate_btn.click(
        fn=generate_exam,
        inputs=[exam_mode, subject, code, units, numA, numB, numC, syllabus_file],
        outputs=[output_file, status_msg]
    )
    
    gr.HTML("""
    <footer>
        <p><strong>MAANGO BIG15 Framework</strong> - 15 Pillars of Excellence in Question Paper Generation</p>
        <p>Developed with ❤️ by Veerakumar C B | Codeboosters Tech | © 2024</p>
    </footer>
    """)

if __name__ == "__main__":
    demo.launch()