Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,142 +1,405 @@
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import tempfile
|
|
|
|
| 3 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
-
from pypdf import PdfReader
|
| 6 |
-
|
| 7 |
-
from agents import GroqClient, SerpClient
|
| 8 |
-
from multi_agent import MultiAgentOrchestrator
|
| 9 |
-
from docx_builder import (
|
| 10 |
-
build_question_paper_docx,
|
| 11 |
-
build_answers_docx,
|
| 12 |
-
build_obe_docx,
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
# =====================================================
|
| 16 |
-
# SAFE FILE TEXT EXTRACTION (TXT, DOCX, PDF Supported)
|
| 17 |
-
# =====================================================
|
| 18 |
-
def extract_text(file_obj):
|
| 19 |
-
"""Reads syllabus from .txt, .docx or .pdf without breaking."""
|
| 20 |
-
name = file_obj.name.lower()
|
| 21 |
-
|
| 22 |
-
# TXT
|
| 23 |
-
if name.endswith(".txt"):
|
| 24 |
-
content = file_obj.read()
|
| 25 |
-
try:
|
| 26 |
-
return content.decode("utf-8", errors="ignore")
|
| 27 |
-
except:
|
| 28 |
-
return content.decode("latin-1", errors="ignore")
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
return
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
-
|
| 43 |
-
pages = [p.extract_text() or "" for p in reader.pages]
|
| 44 |
-
return "\n".join(pages)
|
| 45 |
except Exception:
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
serp = SerpClient(api_key=serp_key)
|
| 60 |
-
|
| 61 |
-
return groq, serp
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
groq_client, serp_client = init_clients()
|
| 65 |
-
orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# =====================================================
|
| 69 |
-
# MAIN PIPELINE FUNCTION
|
| 70 |
-
# =====================================================
|
| 71 |
-
def run_system(subject, stream, partA, partB, partC, syl_file, ref_file):
|
| 72 |
-
|
| 73 |
-
if syl_file is None:
|
| 74 |
-
return None, None, None, "Error: Upload a syllabus file."
|
| 75 |
-
|
| 76 |
-
# Extract text safely
|
| 77 |
-
syllabus_text = extract_text(syl_file)
|
| 78 |
-
ref_text = extract_text(ref_file) if ref_file else ""
|
| 79 |
-
|
| 80 |
-
# Run multi-agent orchestration
|
| 81 |
-
output = orchestrator.run_pipeline(
|
| 82 |
-
subject=subject,
|
| 83 |
-
stream=stream,
|
| 84 |
-
partA=int(partA),
|
| 85 |
-
partB=int(partB),
|
| 86 |
-
partC=int(partC),
|
| 87 |
-
syllabus_text=syllabus_text,
|
| 88 |
-
ref_qp_text=ref_text,
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
final_json = output.get("final", {})
|
| 92 |
-
generator_raw = output.get("generator_raw", "")
|
| 93 |
-
|
| 94 |
-
# Temporary directory for docx exports
|
| 95 |
-
tmpdir = Path(tempfile.mkdtemp())
|
| 96 |
-
|
| 97 |
-
qp_path = tmpdir / f"{subject}_QuestionPaper.docx"
|
| 98 |
-
ans_path = tmpdir / f"{subject}_AnswerKey.docx"
|
| 99 |
-
obe_path = tmpdir / f"{subject}_OBE_Summary.docx"
|
| 100 |
-
|
| 101 |
-
# Build all DOCX files
|
| 102 |
-
build_question_paper_docx(qp_path, final_json, generator_raw, subject)
|
| 103 |
-
build_answers_docx(ans_path, final_json, subject)
|
| 104 |
-
build_obe_docx(obe_path, final_json, subject)
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
with gr.Blocks() as app:
|
| 114 |
-
|
| 115 |
-
|
|
|
|
| 116 |
|
| 117 |
with gr.Row():
|
| 118 |
-
subject = gr.Textbox(label="Subject Name",
|
| 119 |
-
stream = gr.Dropdown(["CSE", "Non-CSE"],
|
| 120 |
|
| 121 |
with gr.Row():
|
| 122 |
-
partA = gr.Number(label="Part A
|
| 123 |
-
partB = gr.Number(label="Part B
|
| 124 |
-
partC = gr.Number(label="Part C
|
| 125 |
|
| 126 |
-
syllabus = gr.File(label="Upload Syllabus (.
|
| 127 |
-
ref_qp = gr.File(label="
|
| 128 |
|
| 129 |
generate_btn = gr.Button("Generate Question Paper")
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
status = gr.Markdown("Status:
|
| 135 |
|
| 136 |
-
generate_btn.click(
|
| 137 |
-
run_system,
|
| 138 |
-
inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp],
|
| 139 |
-
outputs=[qp_output, ans_output, obe_output, status]
|
| 140 |
-
)
|
| 141 |
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py (single-file, all-in-one)
|
| 2 |
import os
|
| 3 |
+
import json
|
| 4 |
import tempfile
|
| 5 |
+
import traceback
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import Optional, Dict, Any, List
|
| 8 |
+
|
| 9 |
+
import requests
|
| 10 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# ---------------------------
|
| 13 |
+
# CONFIG / MODELS (Groq model ids)
|
| 14 |
+
# ---------------------------
|
| 15 |
+
GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile")
|
| 16 |
+
VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it")
|
| 17 |
+
FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768")
|
| 18 |
+
|
| 19 |
+
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 20 |
+
SERP_URL = "https://serpapi.com/search"
|
| 21 |
+
|
| 22 |
+
# ---------------------------
|
| 23 |
+
# Helpers: Groq Client & SerpClient
|
| 24 |
+
# ---------------------------
|
| 25 |
+
class GroqClient:
|
| 26 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 27 |
+
self.api_key = api_key or os.getenv("GROQ_API_KEY")
|
| 28 |
+
if not self.api_key:
|
| 29 |
+
raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.")
|
| 30 |
+
self.url = GROQ_URL
|
| 31 |
+
self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
| 32 |
+
|
| 33 |
+
def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
|
| 34 |
+
payload = {
|
| 35 |
+
"model": model,
|
| 36 |
+
"messages": messages,
|
| 37 |
+
"max_tokens": max_tokens,
|
| 38 |
+
"temperature": temperature,
|
| 39 |
+
"top_p": 0.95
|
| 40 |
+
}
|
| 41 |
+
resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120)
|
| 42 |
+
if resp.status_code != 200:
|
| 43 |
+
raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}")
|
| 44 |
+
data = resp.json()
|
| 45 |
try:
|
| 46 |
+
return data["choices"][0]["message"]["content"]
|
| 47 |
+
except Exception:
|
| 48 |
+
return json.dumps(data)
|
| 49 |
+
|
| 50 |
+
def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
|
| 51 |
+
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
| 52 |
+
return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class SerpClient:
|
| 56 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 57 |
+
self.api_key = api_key or os.getenv("SERPAPI_KEY")
|
| 58 |
+
if not self.api_key:
|
| 59 |
+
raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.")
|
| 60 |
+
self.url = SERP_URL
|
| 61 |
+
|
| 62 |
+
def search(self, query: str, num: int = 5) -> Dict[str, Any]:
|
| 63 |
+
params = {"q": query, "api_key": self.api_key, "num": num}
|
| 64 |
+
resp = requests.get(self.url, params=params, timeout=30)
|
| 65 |
+
if resp.status_code != 200:
|
| 66 |
+
raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}")
|
| 67 |
+
return resp.json()
|
| 68 |
+
|
| 69 |
+
# ---------------------------
|
| 70 |
+
# Safe file text extraction (Gradio returns FileData dict: {"name", "size", "path"})
|
| 71 |
+
# ---------------------------
|
| 72 |
+
def extract_text_from_gradio_file(filedata: Optional[dict]) -> str:
|
| 73 |
+
if not filedata:
|
| 74 |
+
return ""
|
| 75 |
+
# filedata is a dict with 'name' and 'path' in Spaces
|
| 76 |
+
file_path = filedata.get("path") or filedata.get("tmp_path") or filedata.get("name")
|
| 77 |
+
if not file_path:
|
| 78 |
+
return ""
|
| 79 |
+
file_path = str(file_path)
|
| 80 |
+
try:
|
| 81 |
+
lower = file_path.lower()
|
| 82 |
+
if lower.endswith(".txt"):
|
| 83 |
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
| 84 |
+
return f.read()
|
| 85 |
+
if lower.endswith(".pdf"):
|
| 86 |
+
try:
|
| 87 |
+
from pypdf import PdfReader
|
| 88 |
+
reader = PdfReader(file_path)
|
| 89 |
+
return "\n".join([p.extract_text() or "" for p in reader.pages])
|
| 90 |
+
except Exception:
|
| 91 |
+
# fallback: try binary read and decode
|
| 92 |
+
with open(file_path, "rb") as f:
|
| 93 |
+
return f.read().decode("utf-8", errors="ignore")
|
| 94 |
+
if lower.endswith(".docx"):
|
| 95 |
+
try:
|
| 96 |
+
import docx
|
| 97 |
+
doc = docx.Document(file_path)
|
| 98 |
+
return "\n".join([p.text for p in doc.paragraphs])
|
| 99 |
+
except Exception:
|
| 100 |
+
with open(file_path, "rb") as f:
|
| 101 |
+
return f.read().decode("utf-8", errors="ignore")
|
| 102 |
+
# Fallback: read bytes
|
| 103 |
+
with open(file_path, "rb") as f:
|
| 104 |
+
return f.read().decode("utf-8", errors="ignore")
|
| 105 |
+
except Exception:
|
| 106 |
+
return ""
|
| 107 |
+
|
| 108 |
+
# ---------------------------
|
| 109 |
+
# Prompt Templates (CSE and Non-CSE) - use your exact doc content here if available
|
| 110 |
+
# ---------------------------
|
| 111 |
+
NONCSE_TEMPLATE = """
|
| 112 |
+
Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
|
| 113 |
+
Task: Generate an internal/continuous-assessment question paper matching GATE style.
|
| 114 |
+
Rules:
|
| 115 |
+
- Part A: {partA} questions, approx 2 marks each (adjust if marks per question different).
|
| 116 |
+
- Part B: {partB} questions, choice/either-or pairs (marks per question ~13; adapt per template).
|
| 117 |
+
- Part C: {partC} questions, case/design (higher marks).
|
| 118 |
+
- Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
|
| 119 |
+
- Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
|
| 120 |
+
- Maintain difficulty index between 1.8 and 2.5.
|
| 121 |
+
- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end, containing "questions" list with fields:
|
| 122 |
+
question_no, part, sub_no, marks, unit, course_outcome, bloom_level, tags, question_text
|
| 123 |
+
"""
|
| 124 |
+
|
| 125 |
+
CSE_TEMPLATE = """
|
| 126 |
+
Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
|
| 127 |
+
Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
|
| 128 |
+
Rules:
|
| 129 |
+
- Part A: {partA} questions (short-answer)
|
| 130 |
+
- Part B: {partB} questions (Either/Or; marks per question ~16)
|
| 131 |
+
- Part C: {partC} questions (case/design)
|
| 132 |
+
- Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
|
| 133 |
+
- 20% of questions must be industry/case-study oriented.
|
| 134 |
+
- Provide printable QP and VALID JSON <<QP_JSON>> as described above.
|
| 135 |
+
"""
|
| 136 |
+
|
| 137 |
+
def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
|
| 138 |
+
template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
|
| 139 |
+
prompt = template.format(partA=partA, partB=partB, partC=partC)
|
| 140 |
+
prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array with the schema described above. JSON must be valid.\n"
|
| 141 |
+
return prompt
|
| 142 |
+
|
| 143 |
+
# ---------------------------
|
| 144 |
+
# Utility: extract JSON suffix from generator text
|
| 145 |
+
# ---------------------------
|
| 146 |
+
def extract_json_from_text(text: str) -> Optional[dict]:
|
| 147 |
+
if not text:
|
| 148 |
+
return None
|
| 149 |
+
# try to locate <<QP_JSON>>
|
| 150 |
+
idx = text.rfind("<<QP_JSON>>")
|
| 151 |
+
candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None
|
| 152 |
+
if candidate:
|
| 153 |
try:
|
| 154 |
+
return json.loads(candidate)
|
|
|
|
|
|
|
| 155 |
except Exception:
|
| 156 |
+
# try find last '{'
|
| 157 |
+
try:
|
| 158 |
+
start = text.rfind("{")
|
| 159 |
+
return json.loads(text[start:])
|
| 160 |
+
except Exception:
|
| 161 |
+
return None
|
| 162 |
+
else:
|
| 163 |
+
# fallback: try parse last {...}
|
| 164 |
+
try:
|
| 165 |
+
start = text.rfind("{")
|
| 166 |
+
return json.loads(text[start:])
|
| 167 |
+
except Exception:
|
| 168 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
# ---------------------------
|
| 171 |
+
# Multi-agent orchestrator (inlined)
|
| 172 |
+
# ---------------------------
|
| 173 |
+
class MultiAgentOrchestrator:
|
| 174 |
+
def __init__(self, groq_client: GroqClient, serp_client: SerpClient):
|
| 175 |
+
self.groq = groq_client
|
| 176 |
+
self.serp = serp_client
|
| 177 |
|
| 178 |
+
def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str:
|
| 179 |
+
try:
|
| 180 |
+
q = f"{subject} recent developments 2024 2025"
|
| 181 |
+
out = self.serp.search(q, num=n)
|
| 182 |
+
snippets = []
|
| 183 |
+
# serpapi returns organic_results usually
|
| 184 |
+
for item in out.get("organic_results", [])[:n]:
|
| 185 |
+
title = item.get("title", "")
|
| 186 |
+
snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
|
| 187 |
+
link = item.get("link", "")
|
| 188 |
+
if title or snippet:
|
| 189 |
+
snippets.append(f"{title}\n{snippet}\n{link}")
|
| 190 |
+
if not snippets and "answer" in out:
|
| 191 |
+
snippets.append(str(out.get("answer")))
|
| 192 |
+
return "\n\n".join(snippets)
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return ""
|
| 195 |
|
| 196 |
+
def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
|
| 197 |
+
result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []}
|
| 198 |
+
try:
|
| 199 |
+
realtime = self.fetch_realtime_snippets(subject)
|
| 200 |
+
prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime)
|
| 201 |
+
|
| 202 |
+
# AGENT 1: GENERATOR
|
| 203 |
+
try:
|
| 204 |
+
gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0)
|
| 205 |
+
except Exception as e:
|
| 206 |
+
raise RuntimeError(f"Generator agent failed: {e}")
|
| 207 |
+
result["generator_raw"] = gen_out
|
| 208 |
+
|
| 209 |
+
# Try extract JSON
|
| 210 |
+
qp_json = extract_json_from_text(gen_out)
|
| 211 |
+
if qp_json is None:
|
| 212 |
+
# ask generator for JSON only
|
| 213 |
+
json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
|
| 214 |
+
gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
|
| 215 |
+
try:
|
| 216 |
+
qp_json = json.loads(gen_json_only)
|
| 217 |
+
except Exception:
|
| 218 |
+
qp_json = {"raw_text": gen_out}
|
| 219 |
+
|
| 220 |
+
result["qp_json"] = qp_json
|
| 221 |
+
|
| 222 |
+
# AGENT 2: VERIFIER
|
| 223 |
+
try:
|
| 224 |
+
verifier_prompt = f"You are an academic verifier. Verify the QP JSON below for:\n- Bloom's taxonomy correctness\n- Unit coverage and distribution\n- Correct number of questions per part\n- Tag completeness and Company/GATE tags\n- Difficulty index 1.8-2.5\n- Duplications or ambiguous statements\nReturn a JSON object: {{'corrections': [...], 'issues': [...]}}"
|
| 225 |
+
verifier_input = json.dumps(qp_json)[:50000]
|
| 226 |
+
ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
|
| 227 |
+
try:
|
| 228 |
+
ver_json = json.loads(ver_out)
|
| 229 |
+
except Exception:
|
| 230 |
+
# If not valid JSON, return raw text under 'raw'
|
| 231 |
+
ver_json = {"raw": ver_out}
|
| 232 |
+
result["verifier"] = ver_json
|
| 233 |
+
except Exception as e:
|
| 234 |
+
result["verifier"] = {"error": str(e)}
|
| 235 |
+
|
| 236 |
+
# AGENT 3: FORMATTER (apply corrections & produce final JSON)
|
| 237 |
+
try:
|
| 238 |
+
fmt_prompt = (
|
| 239 |
+
"You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
|
| 240 |
+
"and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n"
|
| 241 |
+
+ json.dumps(qp_json)[:50000]
|
| 242 |
+
+ "\n\nVERIFIER_CORRECTIONS:\n"
|
| 243 |
+
+ json.dumps(result["verifier"])[:50000]
|
| 244 |
+
+ "\n\nReturn ONE valid JSON object."
|
| 245 |
+
)
|
| 246 |
+
fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0)
|
| 247 |
+
try:
|
| 248 |
+
final_json = json.loads(fmt_out)
|
| 249 |
+
except Exception:
|
| 250 |
+
final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]}
|
| 251 |
+
result["final"] = final_json
|
| 252 |
+
except Exception as e:
|
| 253 |
+
result["final"] = {"error": str(e)}
|
| 254 |
+
except Exception as e:
|
| 255 |
+
result["errors"].append(traceback.format_exc())
|
| 256 |
+
return result
|
| 257 |
+
|
| 258 |
+
# ---------------------------
|
| 259 |
+
# DOCX builder functions (inline)
|
| 260 |
+
# ---------------------------
|
| 261 |
+
def _add_paragraph(doc, text, bold=False):
|
| 262 |
+
run = doc.add_paragraph().add_run(text)
|
| 263 |
+
run.bold = bold
|
| 264 |
+
|
| 265 |
+
def build_question_paper_docx(path: Path, final_json: dict, generator_raw: str, subject: str):
|
| 266 |
+
from docx import Document
|
| 267 |
+
doc = Document()
|
| 268 |
+
doc.add_heading(f"SNS College of Technology — {subject}", level=1)
|
| 269 |
+
doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
|
| 270 |
+
doc.add_paragraph("\nPrintable Question Paper:\n")
|
| 271 |
+
if generator_raw:
|
| 272 |
+
# limit to a large but safe size
|
| 273 |
+
doc.add_paragraph(generator_raw[:20000])
|
| 274 |
+
# If structured final_json contains final_qp.questions, create a table
|
| 275 |
+
questions = []
|
| 276 |
+
if isinstance(final_json, dict):
|
| 277 |
+
fq = final_json.get("final_qp") or final_json.get("final") or final_json
|
| 278 |
+
if isinstance(fq, dict):
|
| 279 |
+
questions = fq.get("questions", []) or []
|
| 280 |
+
if questions:
|
| 281 |
+
table = doc.add_table(rows=1, cols=5)
|
| 282 |
+
hdr = table.rows[0].cells
|
| 283 |
+
hdr[0].text = "Q.No"
|
| 284 |
+
hdr[1].text = "SubQ"
|
| 285 |
+
hdr[2].text = "Question"
|
| 286 |
+
hdr[3].text = "Course Outcome"
|
| 287 |
+
hdr[4].text = "Bloom / Tags"
|
| 288 |
+
for q in questions:
|
| 289 |
+
row = table.add_row().cells
|
| 290 |
+
row[0].text = str(q.get("question_no", ""))
|
| 291 |
+
row[1].text = str(q.get("sub_no", ""))
|
| 292 |
+
row[2].text = str(q.get("question_text", "")).strip()
|
| 293 |
+
row[3].text = str(q.get("course_outcome", ""))
|
| 294 |
+
row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
|
| 295 |
+
doc.save(path)
|
| 296 |
+
|
| 297 |
+
def build_answers_docx(path: Path, final_json: dict, subject: str):
|
| 298 |
+
from docx import Document
|
| 299 |
+
doc = Document()
|
| 300 |
+
doc.add_heading(f"Answer Key — {subject}", level=1)
|
| 301 |
+
answers = {}
|
| 302 |
+
if isinstance(final_json, dict):
|
| 303 |
+
answers = final_json.get("answers", {}) or final_json.get("final", {}).get("answers", {}) or {}
|
| 304 |
+
if isinstance(answers, dict) and answers:
|
| 305 |
+
for k, v in answers.items():
|
| 306 |
+
p = doc.add_paragraph()
|
| 307 |
+
p.add_run(f"{k}:\n").bold = True
|
| 308 |
+
doc.add_paragraph(str(v))
|
| 309 |
+
else:
|
| 310 |
+
doc.add_paragraph(json.dumps(final_json.get("answers", final_json), indent=2)[:15000])
|
| 311 |
+
doc.save(path)
|
| 312 |
+
|
| 313 |
+
def build_obe_docx(path: Path, final_json: dict, subject: str):
|
| 314 |
+
from docx import Document
|
| 315 |
+
doc = Document()
|
| 316 |
+
doc.add_heading(f"OBE Summary — {subject}", level=1)
|
| 317 |
+
obe = {}
|
| 318 |
+
if isinstance(final_json, dict):
|
| 319 |
+
obe = final_json.get("obe", {}) or final_json.get("final", {}).get("obe", {}) or {}
|
| 320 |
+
doc.add_paragraph(json.dumps(obe, indent=2)[:15000])
|
| 321 |
+
doc.save(path)
|
| 322 |
+
|
| 323 |
+
# ---------------------------
|
| 324 |
+
# Initialize clients (raise friendly error if secrets missing)
|
| 325 |
+
# ---------------------------
|
| 326 |
+
try:
|
| 327 |
+
groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY"))
|
| 328 |
+
serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY"))
|
| 329 |
+
orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
|
| 330 |
+
except Exception as e:
|
| 331 |
+
orchestrator = None
|
| 332 |
+
init_error = str(e)
|
| 333 |
+
else:
|
| 334 |
+
init_error = None
|
| 335 |
+
|
| 336 |
+
# ---------------------------
|
| 337 |
+
# Gradio UI: single-file app
|
| 338 |
+
# ---------------------------
|
| 339 |
+
def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file):
|
| 340 |
+
if init_error:
|
| 341 |
+
return None, None, None, f"Server init error: {init_error}"
|
| 342 |
+
try:
|
| 343 |
+
# extract text from uploaded syllabus and reference QP
|
| 344 |
+
syllabus_text = extract_text_from_gradio_file(syllabus_file)
|
| 345 |
+
ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
|
| 346 |
+
if not syllabus_text:
|
| 347 |
+
# If the user uploaded nothing or extraction failed, show helpful message referencing the sample file
|
| 348 |
+
sample_path = "/mnt/data/cloud_computing_syllabus.txt"
|
| 349 |
+
msg = ("Syllabus extraction failed or file empty. "
|
| 350 |
+
f"If you want to test immediately, you can use the sample syllabus located at: {sample_path} "
|
| 351 |
+
"Upload a .txt/.pdf/.docx file instead.")
|
| 352 |
+
return None, None, None, msg
|
| 353 |
+
|
| 354 |
+
# call orchestrator
|
| 355 |
+
out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)
|
| 356 |
+
final_json = out.get("final", {})
|
| 357 |
+
gen_raw = out.get("generator_raw", "")
|
| 358 |
+
|
| 359 |
+
# write docx files to temp dir
|
| 360 |
+
tmpdir = Path(tempfile.mkdtemp())
|
| 361 |
+
qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx"
|
| 362 |
+
ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx"
|
| 363 |
+
obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx"
|
| 364 |
+
|
| 365 |
+
build_question_paper_docx(qp_path, final_json, gen_raw, subject)
|
| 366 |
+
build_answers_docx(ans_path, final_json, subject)
|
| 367 |
+
build_obe_docx(obe_path, final_json, subject)
|
| 368 |
+
|
| 369 |
+
return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully."
|
| 370 |
+
|
| 371 |
+
except Exception as e:
|
| 372 |
+
tb = traceback.format_exc()
|
| 373 |
+
return None, None, None, f"Generation failed: {e}\n\n{tb}"
|
| 374 |
+
|
| 375 |
+
# Build UI
|
| 376 |
with gr.Blocks() as app:
|
| 377 |
+
gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app")
|
| 378 |
+
if init_error:
|
| 379 |
+
gr.Markdown(f"**Initialization error:** {init_error}")
|
| 380 |
|
| 381 |
with gr.Row():
|
| 382 |
+
subject = gr.Textbox(label="Subject Name", value="Cloud Computing")
|
| 383 |
+
stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE")
|
| 384 |
|
| 385 |
with gr.Row():
|
| 386 |
+
partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0)
|
| 387 |
+
partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0)
|
| 388 |
+
partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0)
|
| 389 |
|
| 390 |
+
syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)")
|
| 391 |
+
ref_qp = gr.File(label="Reference QP (optional)")
|
| 392 |
|
| 393 |
generate_btn = gr.Button("Generate Question Paper")
|
| 394 |
|
| 395 |
+
qp_file = gr.File(label="Question Paper (.docx)")
|
| 396 |
+
ans_file = gr.File(label="Answer Key (.docx)")
|
| 397 |
+
obe_file = gr.File(label="OBE Summary (.docx)")
|
| 398 |
+
status = gr.Markdown("Status: Idle")
|
| 399 |
|
| 400 |
+
generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
# Launch
|
| 403 |
+
if __name__ == "__main__":
|
| 404 |
+
# On Spaces, gradio will handle host/port; for local testing you can set share=True
|
| 405 |
+
app.launch()
|