SNSQ / app.py
codeboosterstech's picture
Update app.py
cf60b98 verified
import gradio as gr
import os
from datetime import datetime
import json
import re
import zipfile
import docx
from docx import Document
import pdfplumber
from langchain_groq import ChatGroq
from langchain_community.utilities import SerpAPIWrapper
from langchain_core.messages import HumanMessage
import warnings
warnings.filterwarnings('ignore')
def init_models():
groq_key = os.getenv("GROQ_API_KEY")
serp_key = os.getenv("SERPAPI_API_KEY")
if not groq_key or not serp_key:
return None, None, None, None
model_question_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key)
model_answer_gen = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_key)
model_trend_analyzer = ChatGroq(model="groq/compound", api_key=groq_key)
serp = SerpAPIWrapper(serpapi_api_key=serp_key)
return model_question_gen, model_answer_gen, model_trend_analyzer, serp
def extract_docx(path):
d = docx.Document(path)
return "\n".join(p.text for p in d.paragraphs if p.text.strip())
def extract_pdf(path):
out = []
with pdfplumber.open(path) as pdf:
for p in pdf.pages:
t = p.extract_text()
if t: out.append(t)
return "\n".join(out)
def extract_text(path):
if path.endswith(".pdf"): return extract_pdf(path)
if path.endswith(".docx"): return extract_docx(path)
return open(path, 'r', encoding='utf-8').read()
def sanitize_json(text):
text = text.replace("```json", "").replace("```", "")
m = re.search(r"(\{.*\})", text, flags=re.DOTALL)
if not m:
raise RuntimeError(f"ERROR: LLM did NOT return JSON.")
blob = m.group(1)
blob = re.sub(r",\s*(\}|\])", r"\1", blob)
return json.loads(blob)
def extract_units(syllabus_text, unit_range):
unit_range = unit_range.replace(" ", "")
if "-" in unit_range:
start, end = map(int, unit_range.split("-"))
units = list(range(start, end + 1))
else:
units = [int(u) for u in unit_range.split(",")]
unit_header_pattern = r"(UNIT[\s\-β€”:]*([0-9IVX]+))"
matches = list(re.finditer(unit_header_pattern, syllabus_text, flags=re.I))
if not matches:
return syllabus_text
unit_blocks = []
for i, m in enumerate(matches):
raw_num = m.group(2).strip()
try:
if raw_num.isdigit():
num = int(raw_num)
else:
roman_map = {"I":1,"II":2,"III":3,"IV":4,"V":5,"VI":6,"VII":7,"VIII":8,"IX":9,"X":10}
num = roman_map.get(raw_num.upper(), None)
except:
num = None
if num:
start = m.start()
end = matches[i+1].start() if i+1 < len(matches) else len(syllabus_text)
unit_blocks.append((num, start, end))
extracted = ""
for u in units:
for block in unit_blocks:
if block[0] == u:
extracted += syllabus_text[block[1]:block[2]].strip() + "\n\n"
return extracted.strip() if extracted else syllabus_text
def apply_MAANGO_BIG15_framework(base_prompt):
maango_block = """
=== MAANGO BIG15 ADVANCED QUESTION ENGINE FRAMEWORK ===
You MUST follow ALL 15 pillars while generating the question paper:
1. M β€” Multi-Cognitive Bloom Alignment
– Strict distribution of Remember/Understand/Apply/Analyze/Evaluate/Create.
– 40% minimum higher-order (Apply/Analyze/Evaluate/Create).
2. A β€” Apply–Analyze Weightage Boost
– At least 2 questions in Part B and 1 in Part C MUST be scenario/analytical.
3. A β€” Adaptive Difficulty Index
– Maintain difficulty index between 1.8 and 2.5 for every question.
4. N β€” Non-Repetitive Deep Coverage
– No overlap in concepts, no repeated phrasing.
5. G β€” Granular Unit Balancing
– Ensure questions cover all units proportionately.
6. O β€” Outcome Mapping Discipline
– Every question must be CO-aligned using strong engineering verbs.
7. B β€” BIG15 Industry Integration
– Each question must include TCS/Infosys/Wipro/Accenture/Generic relevance.
8. I β€” Industry Application Layer
– At least 20% questions include real-world industry scenarios.
9. G β€” GATE Layer Injection
– Higher-order questions must reflect GATE-style design/analysis depth.
10. 1 β€” First-Half / Second-Half Coverage Integrity
– Early questions from Unit 1–2, later from Unit 3–5.
11. 5 β€” Five-Unit Symmetry
– Maintain balance across all parts (A/B/C).
12. S β€” Structured Output Discipline
– Follow exact JSON schema, no deviations.
13. E β€” Exam-Mode Smart Switching
– Auto-adjust tone depending on CA / ESE / GATE mode.
14. T β€” Technical Depth Enforcement
– Strong technical keywords, no vague verbs.
15. H β€” Holistic Coherence
– Ensure clarity, correctness, industry relevance, and zero ambiguity.
=== END OF MAANGO BIG15 FRAMEWORK ===
"""
return maango_block + "\n\n" + base_prompt
def build_question_prompt(subject, syllabus, numA, numB, numC, exam_mode):
base_prompt = f"""
You are an exam generator for {exam_mode} mode. Output ONLY VALID JSON.
STRICT JSON SCHEMA:
{{
"metadata": {{
"subject": "{subject}",
"date": "{datetime.now().strftime('%Y-%m-%d')}"
}},
"partA": [
{{
"question_text": "string",
"marks": 2 or 3,
"unit": "number",
"bloom_level": "Remember/Understand/Apply/Analyze/Evaluate/Create",
"company_tag": "TCS/Infosys/Wipro/Generic"
}}
],
"partB": [
{{
"either": {{
"question_text": "string",
"marks": 10,
"unit": "number",
"bloom_level": "Analyze/Evaluate/Create",
"company_tag": "TCS/Infosys/Wipro/Generic"
}},
"or": {{
"question_text": "string",
"marks": 10,
"unit": "number",
"bloom_level": "Analyze/Evaluate/Create",
"company_tag": "TCS/Infosys/Wipro/Generic"
}}
}}
],
"partC": [
{{
"either": {{
"question_text": "string",
"marks": 15,
"unit": "number",
"bloom_level": "Create/Evaluate",
"company_tag": "TCS/Infosys/Wipro/Generic"
}},
"or": {{
"question_text": "string",
"marks": 15,
"unit": "number",
"bloom_level": "Create/Evaluate",
"company_tag": "TCS/Infosys/Wipro/Generic"
}}
}}
]
}}
REQUIREMENTS:
- Generate EXACTLY {numA} questions in partA.
- Generate EXACTLY {numB} pairs in partB.
- Generate EXACTLY {numC} pairs in partC.
- All questions must map correctly to the syllabus.
- Maintain unit coverage balance.
- Use industry relevance when appropriate.
Syllabus:
{syllabus}
Return ONLY pure JSON. No commentary.
"""
return apply_MAANGO_BIG15_framework(base_prompt)
def apply_MAANGO_BIG15_answerkey(base_prompt):
maango_key_block = """
=== MAANGO BIG15 ADVANCED ANSWER-KEY FRAMEWORK ===
All answers MUST follow the 15 MAANGO pillars. Specifically:
1. Bloom Alignment
– Answer must match Bloom level of the original question.
– Higher-order – include reasoning, evaluation, derivation, design logic.
2. Industry Integration
– Wherever possible, include short 1–2 line relevance to TCS/Infosys/Wipro/Accenture/Generic IT.
3. Correctness & Precision
– No vague words. Use technical definitions only.
4. Depth Control
– Part A: concise (3–6 lines)
– Part B: structured multi-step reasoning
– Part C: full analytical/creative design solution
5. Non-Repetition
– Do not reuse the same explanation style across answers.
6. Difficulty Index Match
– Keep answer complexity aligned with the question's difficulty.
7. Structured JSON Schema Enforcement
– Must follow the answer-key schema EXACTLY.
8. GATE Style Reinforcement
– For higher-order questions, include formulas, assumptions, models.
9. Holistic Clarity
– Avoid ambiguities. Provide crisp, exam-ready answers.
=== END OF MAANGO BIG15 ANSWER-KEY FRAMEWORK ===
"""
return maango_key_block + "\n\n" + base_prompt
def build_answer_prompt(syllabus, questions_json):
base_prompt = f"""
You are an expert answer key generator.
Generate ANSWERS ONLY IN VALID JSON.
STRICT JSON FORMAT:
{{
"partA": [
{{
"question_text": "same as input",
"answer": "detailed but concise answer",
"marks": number
}}
],
"partB": [
{{
"question_text": "either OR question merged or handled individually",
"answer": "model answer",
"marks": 10
}}
],
"partC": [
{{
"question_text": "either OR question merged or handled individually",
"answer": "model answer",
"marks": 15
}}
]
}}
INPUT QUESTIONS:
{json.dumps(questions_json, indent=2)}
RULES:
- DO NOT change question text.
- Provide complete, correct answers.
- Return ONLY CLEAN JSON.
"""
return apply_MAANGO_BIG15_answerkey(base_prompt)
def apply_MAANGO_BIG15_trendanalysis(base_prompt):
maango_trend_block = """
=== MAANGO BIG15 TREND ANALYSIS FRAMEWORK ===
All analysis MUST follow:
1. Bloom Pattern Extraction
– Count and percentage across all levels.
2. Difficulty Distribution Mapping
– Detect imbalance or deviations from DI = 1.8–2.5.
3. Unit Coverage Analysis
– Identify over-covered / under-covered units.
4. Industry Relevance Analysis
– Evaluate TCS/Infosys/Wipro/Generic tag distribution.
5. Question-Type Distribution
– Short-answer vs descriptive vs design-oriented.
6. MAANGO Compliance Scoring
– Compute 0–100% MAANGO BIG15 alignment score.
7. Improvement Suggestions
– Provide actionable, unit-wise refinements.
8. GATE/ESE Competency Mapping
– Rate analytical & application strength.
9. Holistic Insights
– Detect patterns indicating strong or weak syllabus areas.
=== END OF MAANGO BIG15 TREND ANALYSIS FRAMEWORK ===
"""
return maango_trend_block + "\n\n" + base_prompt
def build_trend_prompt(subject, q_json):
base_prompt = f"""
You are an exam trend analyzer. Output ONLY VALID JSON.
STRICT SCHEMA:
{{
"bloom_distribution": {{
"Remember": 0,
"Understand": 0,
"Apply": 0,
"Analyze": 0,
"Evaluate": 0,
"Create": 0
}},
"difficulty_insights": "string",
"industry_tag_stats": {{
"TCS": 0,
"Infosys": 0,
"Wipro": 0,
"Generic": 0
}},
"unit_coverage": {{
"unit1": 0,
"unit2": 0,
"unit3": 0,
"unit4": 0,
"unit5": 0
}},
"maango_score": "0-100",
"improvement_suggestions": "string"
}}
RULE:
Use the "unit" field inside every question object to compute unit_coverage. DO NOT guess.
SUBJECT:
{subject}
QUESTIONS FOR ANALYSIS:
{json.dumps(q_json, indent=2)}
"""
return apply_MAANGO_BIG15_trendanalysis(base_prompt)
def create_question_paper(code, name, partA, partB, partC, output_path):
doc = Document()
doc.add_heading("SNS College of Technology", level=1)
doc.add_paragraph(f"Subject Code: {code} Subject: {name}")
doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}")
doc.add_paragraph("Instructions: Answer as per parts and marks specified.\n")
doc.add_heading("Part A (Short Answer)", level=2)
for idx, q in enumerate(partA, 1):
text = f"{idx}. {q.get('question_text','')}\nMarks: {q.get('marks',0)} | Unit: {q.get('unit','')} | Bloom: {q.get('bloom_level','')} | Tag: {q.get('company_tag','')}"
doc.add_paragraph(text)
doc.add_heading("Part B (Either/Or Questions)", level=2)
start_idx = len(partA)+1
for idx, pair in enumerate(partB, start_idx):
either = pair.get("either", {})
orq = pair.get("or", {})
doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}")
doc.add_paragraph(f" Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}")
doc.add_heading("Part C (Case/Design Questions)", level=2)
start_idx = start_idx + len(partB)
for idx, pair in enumerate(partC, start_idx):
either = pair.get("either", {})
orq = pair.get("or", {})
doc.add_paragraph(f"{idx}. Either: {either.get('question_text','')}\nMarks: {either.get('marks',0)} | Unit: {either.get('unit','')} | Bloom: {either.get('bloom_level','')} | Tag: {either.get('company_tag','')}")
doc.add_paragraph(f" Or: {orq.get('question_text','')}\nMarks: {orq.get('marks',0)} | Unit: {orq.get('unit','')} | Bloom: {orq.get('bloom_level','')} | Tag: {orq.get('company_tag','')}")
doc.save(output_path)
def create_answer_key(code, name, answers, output_path):
doc = Document()
doc.add_heading(f"{name} - Answer Key", level=1)
doc.add_paragraph(f"Subject Code: {code}\n")
doc.add_heading("Part A Answers", level=2)
for idx, a in enumerate(answers.get("partA", []), 1):
doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
doc.add_heading("Part B Answers (Either option)", level=2)
start_idx = len(answers.get("partA", []))+1
for idx, a in enumerate(answers.get("partB", []), start_idx):
doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
doc.add_heading("Part C Answers (Either option)", level=2)
start_idx += len(answers.get("partB", []))
for idx, a in enumerate(answers.get("partC", []), start_idx):
doc.add_paragraph(f"{idx}. {a.get('question_text','')}\nAnswer: {a.get('answer','')}")
doc.save(output_path)
def create_trend_report(code, name, trend_json, serp_data, output_path):
doc = Document()
doc.add_heading(f"{name} - Trend Analysis Report", level=1)
doc.add_paragraph(f"Subject Code: {code}")
doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}\n")
bd = trend_json.get("bloom_distribution", {})
doc.add_heading("Bloom Distribution", level=2)
if bd:
for k, v in bd.items():
doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
else:
doc.add_paragraph("No bloom distribution data found.")
doc.add_heading("Difficulty Insights", level=2)
diff = trend_json.get("difficulty_insights", "")
doc.add_paragraph(diff if diff else "No difficulty insights provided.")
doc.add_heading("Industry Tag Statistics", level=2)
its = trend_json.get("industry_tag_stats", {})
if its:
for k, v in its.items():
doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
else:
doc.add_paragraph("No industry tag stats found.")
doc.add_heading("Unit Coverage", level=2)
uc = trend_json.get("unit_coverage", {})
if uc:
for k, v in uc.items():
doc.add_paragraph(f"- {k}: {v}", style='List Bullet')
else:
doc.add_paragraph("No unit coverage data found.")
doc.add_heading("MAANGO BIG15 Compliance Score", level=2)
maango = trend_json.get("maango_score", "")
doc.add_paragraph(f"Score: {maango if maango else 'N/A'}")
doc.add_heading("Improvement Suggestions", level=2)
sugg = trend_json.get("improvement_suggestions", "")
doc.add_paragraph(sugg if sugg else "No suggestions provided.")
if serp_data:
doc.add_heading("SERP / Reference Data (summary)", level=2)
try:
serp_text = json.dumps(serp_data, indent=2) if not isinstance(serp_data, str) else serp_data
except Exception:
serp_text = str(serp_data)
max_chars = 6000
if len(serp_text) > max_chars:
serp_text = serp_text[:max_chars] + "\n\n[Truncated for brevity]"
for line in serp_text.splitlines():
doc.add_paragraph(line)
doc.save(output_path)
def generate_exam(exam_mode, subject, code, units, numA, numB, numC, syllabus_file):
try:
model_q, model_a, model_t, serp = init_models()
if not model_q:
return None, "❌ API Keys not configured. Please set GROQ_API_KEY and SERPAPI_API_KEY in Hugging Face Spaces secrets."
syllabus_text = extract_text(syllabus_file.name)
selected_syllabus = extract_units(syllabus_text, units)
if not selected_syllabus.strip():
return None, "❌ No syllabus found for given units."
# STEP 1: Generate Questions
q_prompt = build_question_prompt(subject, selected_syllabus, numA, numB, numC, exam_mode)
q_raw = model_q.invoke([HumanMessage(content=q_prompt)]).content
q_json = sanitize_json(q_raw)
partA = q_json.get("partA", [])
partB = q_json.get("partB", [])
partC = q_json.get("partC", [])
if not (partA or partB or partC):
return None, "❌ Generated question JSON missing required parts."
# STEP 2: Fetch SERP data
try:
serp_data = serp.run(f"{subject} latest industry syllabus questions trends")
except Exception as e:
serp_data = f"SERP fetch failed: {e}"
# STEP 3: Analyze trends
t_prompt = build_trend_prompt(subject, q_json)
t_raw = model_t.invoke([HumanMessage(content=t_prompt)]).content
t_json = sanitize_json(t_raw)
# STEP 4: Generate Answer Key
a_prompt = build_answer_prompt(selected_syllabus, q_json)
a_raw = model_a.invoke([HumanMessage(content=a_prompt)]).content
a_json = sanitize_json(a_raw)
# STEP 5: Create DOCX files
qp_file = f"{code}_QuestionPaper.docx"
ak_file = f"{code}_AnswerKey.docx"
ta_file = f"{code}_TrendAnalysis.docx"
create_question_paper(code, subject, partA, partB, partC, qp_file)
create_answer_key(code, subject, a_json, ak_file)
create_trend_report(code, subject, t_json, serp_data, ta_file)
zip_file = f"{code}_ExamPackage.zip"
with zipfile.ZipFile(zip_file, 'w') as zipf:
zipf.write(qp_file)
zipf.write(ak_file)
zipf.write(ta_file)
return zip_file, f"βœ… Successfully generated exam package for {subject}!\nπŸ“Š MAANGO Score: {t_json.get('maango_score', 'N/A')}"
except Exception as e:
return None, f"❌ Error: {str(e)}"
# Create the interface without inline CSS for older Gradio versions
with gr.Blocks() as demo:
gr.HTML("""
<style>
* {
box-sizing: border-box;
}
.header-gradient {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 40px 20px;
text-align: center;
border-radius: 15px;
margin-bottom: 30px;
box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
}
.header-gradient h1 {
font-size: 2.5rem;
margin: 0 0 10px 0;
font-weight: 700;
}
.header-gradient p {
margin: 5px 0;
font-size: 1.1rem;
opacity: 0.95;
}
.feature-card {
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 25px;
border-radius: 12px;
border-left: 5px solid #667eea;
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
height: 100%;
}
.feature-card h4 {
color: #667eea;
margin-top: 0;
font-size: 1.3rem;
}
.feature-card ul {
list-style: none;
padding: 0;
margin: 15px 0 0 0;
}
.feature-card li {
padding: 8px 0;
font-size: 1rem;
color: #2d3748;
}
.section-title {
color: #667eea;
font-size: 1.4rem;
font-weight: 600;
margin: 20px 0 15px 0;
padding-bottom: 10px;
border-bottom: 2px solid #667eea;
}
footer {
text-align: center;
padding: 20px;
color: #718096;
margin-top: 40px;
border-top: 1px solid #e2e8f0;
}
@media (max-width: 768px) {
.header-gradient h1 {
font-size: 1.8rem;
}
.header-gradient p {
font-size: 0.95rem;
}
}
</style>
<div class="header-gradient">
<h1>πŸŽ“ SNS Tech - Q&A Agent x Codeboosters Tech</h1>
<p><strong>AI-Powered Question Paper & Answer Key Generator with Trend Analysis</strong></p>
<p>Powered by MAANGO BIG15 Framework | Advanced LLM Technology | Developed by Codeboosters Tech Team</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.HTML('<h3 class="section-title">πŸ“‹ Exam Configuration</h3>')
exam_mode = gr.Dropdown(
choices=["Continuous Assessment (CA)", "End Semester Exam (ESE)", "GATE Style Internal Exam"],
label="Exam Mode",
value="End Semester Exam (ESE)"
)
subject = gr.Textbox(label="Subject Name", placeholder="e.g., Data Structures")
code = gr.Textbox(label="Subject Code", placeholder="e.g., CS301")
units = gr.Textbox(label="Units Range", value="1-5", placeholder="e.g., 1-3 or 1,3,5")
gr.HTML('<h3 class="section-title">πŸ“Š Question Distribution</h3>')
with gr.Row():
numA = gr.Number(label="Part A (Short)", value=10, precision=0)
numB = gr.Number(label="Part B (Descriptive)", value=5, precision=0)
numC = gr.Number(label="Part C (Case Study)", value=1, precision=0)
with gr.Column(scale=1):
gr.HTML('<h3 class="section-title">πŸ“ Syllabus Upload</h3>')
syllabus_file = gr.File(label="Upload Syllabus", file_types=[".pdf", ".docx", ".txt"])
gr.HTML("""
<div class="feature-card">
<h4>✨ MAANGO BIG15 Features</h4>
<ul>
<li>🎯 Multi-Cognitive Bloom Alignment</li>
<li>🏒 Industry Tag Integration (TCS/Infosys/Wipro)</li>
<li>πŸ“ˆ Granular Unit Balancing</li>
<li>πŸ” GATE Layer Injection</li>
<li>πŸ“ Automatic Answer Key Generation</li>
<li>πŸ“Š Advanced Trend Analysis with SERP Data</li>
<li>πŸŽ“ MAANGO Compliance Scoring (0-100)</li>
<li>πŸ’‘ AI-Powered Improvement Suggestions</li>
</ul>
</div>
""")
generate_btn = gr.Button("πŸš€ Generate Complete Exam Package", variant="primary", size="lg")
with gr.Row():
output_file = gr.File(label="πŸ“¦ Download Package (Question Paper + Answer Key + Trend Analysis)")
status_msg = gr.Textbox(label="Status", lines=3)
generate_btn.click(
fn=generate_exam,
inputs=[exam_mode, subject, code, units, numA, numB, numC, syllabus_file],
outputs=[output_file, status_msg]
)
gr.HTML("""
<footer>
<p><strong>MAANGO BIG15 Framework</strong> - 15 Pillars of Excellence in Question Paper Generation</p>
<p>Developed with ❀️ by Veerakumar C B | Codeboosters Tech | © 2024</p>
</footer>
""")
if __name__ == "__main__":
demo.launch()