SNS / app.py
codeboosterstech's picture
Update app.py
943a060 verified
# app.py (patched final single-file)
import os
import json
import tempfile
import traceback
from pathlib import Path
from typing import Optional, Dict, Any, List
import requests
import gradio as gr
# ---------------------------
# CONFIG / MODELS (Groq model ids)
# ---------------------------
GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile")
VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it")
FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768")
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
SERP_URL = "https://serpapi.com/search"
# ---------------------------
# Helpers: Groq Client & SerpClient
# ---------------------------
class GroqClient:
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.getenv("GROQ_API_KEY")
if not self.api_key:
raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.")
self.url = GROQ_URL
self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
payload = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": 0.95
}
resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120)
if resp.status_code != 200:
raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}")
data = resp.json()
try:
return data["choices"][0]["message"]["content"]
except Exception:
return json.dumps(data)
def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature)
class SerpClient:
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.getenv("SERPAPI_KEY")
if not self.api_key:
raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.")
self.url = SERP_URL
def search(self, query: str, num: int = 5) -> Dict[str, Any]:
params = {"q": query, "api_key": self.api_key, "num": num}
resp = requests.get(self.url, params=params, timeout=30)
if resp.status_code != 200:
raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}")
return resp.json()
# ---------------------------
# Safe file text extraction (handles dict and NamedString)
# ---------------------------
def extract_text_from_gradio_file(filedata) -> str:
"""
Accepts either:
- HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n}
- Gradio NamedString or plain string (e.g., "/tmp/..")
Returns extracted text for .txt, .pdf, .docx, or a text fallback.
"""
if not filedata:
return ""
# Determine file path
if isinstance(filedata, dict):
file_path = filedata.get("path") or filedata.get("name")
else:
# NamedString or plain string
file_path = str(filedata)
if not file_path:
return ""
try:
lower = file_path.lower()
if lower.endswith(".txt"):
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
return f.read()
if lower.endswith(".pdf"):
try:
from pypdf import PdfReader
reader = PdfReader(file_path)
return "\n".join([p.extract_text() or "" for p in reader.pages])
except Exception:
with open(file_path, "rb") as f:
return f.read().decode("utf-8", errors="ignore")
if lower.endswith(".docx"):
try:
import docx
doc = docx.Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
except Exception:
with open(file_path, "rb") as f:
return f.read().decode("utf-8", errors="ignore")
# fallback: read bytes
with open(file_path, "rb") as f:
return f.read().decode("utf-8", errors="ignore")
except Exception:
return ""
# ---------------------------
# Prompt Templates (CSE and Non-CSE)
# ---------------------------
NONCSE_TEMPLATE = """
Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
Task: Generate an internal/continuous-assessment question paper matching GATE style.
Rules:
- Part A: {partA} questions, approx 2 marks each.
- Part B: {partB} questions, choice/either-or pairs.
- Part C: {partC} questions, case/design (higher marks).
- Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
- Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
- Maintain difficulty index between 1.8 and 2.5.
- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end containing "questions".
"""
CSE_TEMPLATE = """
Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
Rules:
- Part A: {partA} short-answer questions.
- Part B: {partB} questions (Either/Or pairs).
- Part C: {partC} questions (case/design).
- Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
- 20% of questions must be industry/case-study oriented.
- Provide printable QP and VALID JSON <<QP_JSON>> as described above.
"""
def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
prompt = template.format(partA=partA, partB=partB, partC=partC)
prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array. JSON must be valid.\n"
return prompt
# ---------------------------
# Utility: extract JSON suffix from generator text
# ---------------------------
def extract_json_from_text(text: str) -> Optional[dict]:
if not text:
return None
# try to locate <<QP_JSON>>
idx = text.rfind("<<QP_JSON>>")
candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None
if candidate:
try:
return json.loads(candidate)
except Exception:
try:
start = text.rfind("{")
return json.loads(text[start:])
except Exception:
return None
else:
try:
start = text.rfind("{")
return json.loads(text[start:])
except Exception:
return None
# ---------------------------
# Multi-agent orchestrator (inlined)
# ---------------------------
class MultiAgentOrchestrator:
def __init__(self, groq_client: GroqClient, serp_client: SerpClient):
self.groq = groq_client
self.serp = serp_client
def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str:
try:
q = f"{subject} recent developments 2024 2025"
out = self.serp.search(q, num=n)
snippets = []
for item in out.get("organic_results", [])[:n]:
title = item.get("title", "")
snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
link = item.get("link", "")
if title or snippet:
snippets.append(f"{title}\n{snippet}\n{link}")
if not snippets and "answer" in out:
snippets.append(str(out.get("answer")))
return "\n\n".join(snippets)
except Exception:
return ""
def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []}
try:
realtime = self.fetch_realtime_snippets(subject)
prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime)
# AGENT 1: GENERATOR
try:
gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0)
except Exception as e:
raise RuntimeError(f"Generator agent failed: {e}")
result["generator_raw"] = gen_out
# Try extract JSON
qp_json = extract_json_from_text(gen_out)
if qp_json is None:
json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
try:
qp_json = json.loads(gen_json_only)
except Exception:
qp_json = {"raw_text": gen_out}
result["qp_json"] = qp_json
# AGENT 2: VERIFIER
try:
verifier_prompt = (
"You are an academic verifier. Verify the QP JSON below for:\n"
"- Bloom's taxonomy correctness\n"
"- Unit coverage and distribution\n"
"- Correct number of questions per part\n"
"- Tag completeness and Company/GATE tags\n"
"- Difficulty index 1.8-2.5\n"
"- Duplications or ambiguous statements\n"
"Return a JSON object: {'corrections': [...], 'issues': [...]}"
)
verifier_input = json.dumps(qp_json)[:50000]
ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
try:
ver_json = json.loads(ver_out)
except Exception:
ver_json = {"raw": ver_out}
result["verifier"] = ver_json
except Exception as e:
result["verifier"] = {"error": str(e)}
# AGENT 3: FORMATTER
try:
fmt_prompt = (
"You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
"and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n"
+ json.dumps(qp_json)[:50000]
+ "\n\nVERIFIER_CORRECTIONS:\n"
+ json.dumps(result["verifier"])[:50000]
+ "\n\nReturn ONE valid JSON object."
)
fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0)
try:
final_json = json.loads(fmt_out)
except Exception:
final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]}
result["final"] = final_json
except Exception as e:
result["final"] = {"error": str(e)}
except Exception:
result["errors"].append(traceback.format_exc())
return result
# ---------------------------
# DOCX builder functions (robust)
# ---------------------------
def _add_paragraph(doc, text, bold=False):
p = doc.add_paragraph()
run = p.add_run(text)
run.bold = bold
def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str):
from docx import Document
doc = Document()
doc.add_heading(f"SNS College of Technology — {subject}", level=1)
doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
doc.add_paragraph("\nPrintable Question Paper:\n")
if generator_raw:
doc.add_paragraph(generator_raw[:20000])
questions = []
try:
if isinstance(final_json, dict):
fq = final_json.get("final_qp") or final_json.get("final") or final_json
if isinstance(fq, dict):
questions = fq.get("questions", []) or []
except Exception:
questions = []
if questions:
table = doc.add_table(rows=1, cols=5)
hdr = table.rows[0].cells
hdr[0].text = "Q.No"
hdr[1].text = "SubQ"
hdr[2].text = "Question"
hdr[3].text = "Course Outcome"
hdr[4].text = "Bloom / Tags"
for q in questions:
row = table.add_row().cells
row[0].text = str(q.get("question_no", ""))
row[1].text = str(q.get("sub_no", ""))
row[2].text = str(q.get("question_text", "")).strip()
row[3].text = str(q.get("course_outcome", ""))
row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
else:
doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.")
doc.save(path)
def build_answers_docx(path: Path, final_json: Optional[dict], subject: str):
from docx import Document
doc = Document()
doc.add_heading(f"Answer Key — {subject}", level=1)
answers = {}
if isinstance(final_json, dict):
# try multiple possible locations
answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {}
if isinstance(answers, dict) and answers:
for k, v in answers.items():
p = doc.add_paragraph()
p.add_run(f"{k}:\n").bold = True
doc.add_paragraph(str(v))
else:
# fallback: safe dump
safe_dump = ""
try:
safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000]
except Exception:
safe_dump = str(final_json)[:15000]
doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):")
doc.add_paragraph(safe_dump)
doc.save(path)
def build_obe_docx(path: Path, final_json: Optional[dict], subject: str):
from docx import Document
doc = Document()
doc.add_heading(f"OBE Summary — {subject}", level=1)
obe = {}
if isinstance(final_json, dict):
obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {}
try:
doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000])
except Exception:
doc.add_paragraph(str(obe)[:15000])
doc.save(path)
# ---------------------------
# Initialize clients (raise friendly error if secrets missing)
# ---------------------------
try:
groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY"))
serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY"))
orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
except Exception as e:
orchestrator = None
init_error = str(e)
else:
init_error = None
# ---------------------------
# Gradio UI: single-file app
# ---------------------------
def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file):
if init_error:
return None, None, None, f"Server init error: {init_error}"
try:
# extract text from uploaded syllabus and reference QP
syllabus_text = extract_text_from_gradio_file(syllabus_file)
ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
if not syllabus_text:
sample_path = "/mnt/data/cloud_computing_syllabus.txt"
msg = ("Syllabus extraction failed or file empty. "
f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.")
return None, None, None, msg
# call orchestrator
out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)
# Ensure final_json is always a dict (fallback if None or invalid)
raw_final = out.get("final")
if isinstance(raw_final, dict):
final_json = raw_final
else:
final_json = {
"final_qp": {"questions": []},
"answers": {},
"obe": {},
"error": "Formatter returned invalid JSON or None.",
"generator_raw_sample": (out.get("generator_raw") or "")[:5000]
}
gen_raw = out.get("generator_raw", "")
# write docx files to temp dir
tmpdir = Path(tempfile.mkdtemp())
qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx"
ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx"
obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx"
build_question_paper_docx(qp_path, final_json, gen_raw, subject)
build_answers_docx(ans_path, final_json, subject)
build_obe_docx(obe_path, final_json, subject)
return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully."
except Exception as e:
tb = traceback.format_exc()
return None, None, None, f"Generation failed: {e}\n\n{tb}"
# Build UI
with gr.Blocks() as app:
gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app")
if init_error:
gr.Markdown(f"**Initialization error:** {init_error}")
with gr.Row():
subject = gr.Textbox(label="Subject Name", value="Cloud Computing")
stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE")
with gr.Row():
partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0)
partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0)
partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0)
syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)")
ref_qp = gr.File(label="Reference QP (optional)")
generate_btn = gr.Button("Generate Question Paper")
qp_file = gr.File(label="Question Paper (.docx)")
ans_file = gr.File(label="Answer Key (.docx)")
obe_file = gr.File(label="OBE Summary (.docx)")
status = gr.Markdown("Status: Idle")
generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status])
# Launch
if __name__ == "__main__":
app.launch()