# app.py (patched final single-file) import os import json import tempfile import traceback from pathlib import Path from typing import Optional, Dict, Any, List import requests import gradio as gr # --------------------------- # CONFIG / MODELS (Groq model ids) # --------------------------- GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile") VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it") FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768") GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" SERP_URL = "https://serpapi.com/search" # --------------------------- # Helpers: Groq Client & SerpClient # --------------------------- class GroqClient: def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or os.getenv("GROQ_API_KEY") if not self.api_key: raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.") self.url = GROQ_URL self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str: payload = { "model": model, "messages": messages, "max_tokens": max_tokens, "temperature": temperature, "top_p": 0.95 } resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120) if resp.status_code != 200: raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}") data = resp.json() try: return data["choices"][0]["message"]["content"] except Exception: return json.dumps(data) def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str: messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature) class SerpClient: def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or os.getenv("SERPAPI_KEY") if not self.api_key: raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.") self.url = SERP_URL def search(self, query: str, num: int = 5) -> Dict[str, Any]: params = {"q": query, "api_key": self.api_key, "num": num} resp = requests.get(self.url, params=params, timeout=30) if resp.status_code != 200: raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}") return resp.json() # --------------------------- # Safe file text extraction (handles dict and NamedString) # --------------------------- def extract_text_from_gradio_file(filedata) -> str: """ Accepts either: - HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n} - Gradio NamedString or plain string (e.g., "/tmp/..") Returns extracted text for .txt, .pdf, .docx, or a text fallback. """ if not filedata: return "" # Determine file path if isinstance(filedata, dict): file_path = filedata.get("path") or filedata.get("name") else: # NamedString or plain string file_path = str(filedata) if not file_path: return "" try: lower = file_path.lower() if lower.endswith(".txt"): with open(file_path, "r", encoding="utf-8", errors="ignore") as f: return f.read() if lower.endswith(".pdf"): try: from pypdf import PdfReader reader = PdfReader(file_path) return "\n".join([p.extract_text() or "" for p in reader.pages]) except Exception: with open(file_path, "rb") as f: return f.read().decode("utf-8", errors="ignore") if lower.endswith(".docx"): try: import docx doc = docx.Document(file_path) return "\n".join([p.text for p in doc.paragraphs]) except Exception: with open(file_path, "rb") as f: return f.read().decode("utf-8", errors="ignore") # fallback: read bytes with open(file_path, "rb") as f: return f.read().decode("utf-8", errors="ignore") except Exception: return "" # --------------------------- # Prompt Templates (CSE and Non-CSE) # --------------------------- NONCSE_TEMPLATE = """ Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE). Task: Generate an internal/continuous-assessment question paper matching GATE style. Rules: - Part A: {partA} questions, approx 2 marks each. - Part B: {partB} questions, choice/either-or pairs. - Part C: {partC} questions, case/design (higher marks). - Tag each question at end like: (Bloom's Level: | Unit: | GATE Reference: ) - Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions. - Maintain difficulty index between 1.8 and 2.5. - Produce two outputs: Human-readable printable QP, and VALID JSON labeled <> at the very end containing "questions". """ CSE_TEMPLATE = """ Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15. Task: Generate an internal/continuous-assessment question paper aligned with industry standards. Rules: - Part A: {partA} short-answer questions. - Part B: {partB} questions (Either/Or pairs). - Part C: {partC} questions (case/design). - Tag each question like: (Bloom's Level: | Unit: | Company Tag: ) - 20% of questions must be industry/case-study oriented. - Provide printable QP and VALID JSON <> as described above. """ def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str: template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE prompt = template.format(partA=partA, partB=partB, partC=partC) prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <> containing 'questions' array. JSON must be valid.\n" return prompt # --------------------------- # Utility: extract JSON suffix from generator text # --------------------------- def extract_json_from_text(text: str) -> Optional[dict]: if not text: return None # try to locate <> idx = text.rfind("<>") candidate = text[idx + len("<>"):].strip() if idx != -1 else None if candidate: try: return json.loads(candidate) except Exception: try: start = text.rfind("{") return json.loads(text[start:]) except Exception: return None else: try: start = text.rfind("{") return json.loads(text[start:]) except Exception: return None # --------------------------- # Multi-agent orchestrator (inlined) # --------------------------- class MultiAgentOrchestrator: def __init__(self, groq_client: GroqClient, serp_client: SerpClient): self.groq = groq_client self.serp = serp_client def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str: try: q = f"{subject} recent developments 2024 2025" out = self.serp.search(q, num=n) snippets = [] for item in out.get("organic_results", [])[:n]: title = item.get("title", "") snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "") link = item.get("link", "") if title or snippet: snippets.append(f"{title}\n{snippet}\n{link}") if not snippets and "answer" in out: snippets.append(str(out.get("answer"))) return "\n\n".join(snippets) except Exception: return "" def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]: result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []} try: realtime = self.fetch_realtime_snippets(subject) prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime) # AGENT 1: GENERATOR try: gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0) except Exception as e: raise RuntimeError(f"Generator agent failed: {e}") result["generator_raw"] = gen_out # Try extract JSON qp_json = extract_json_from_text(gen_out) if qp_json is None: json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)." gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0) try: qp_json = json.loads(gen_json_only) except Exception: qp_json = {"raw_text": gen_out} result["qp_json"] = qp_json # AGENT 2: VERIFIER try: verifier_prompt = ( "You are an academic verifier. Verify the QP JSON below for:\n" "- Bloom's taxonomy correctness\n" "- Unit coverage and distribution\n" "- Correct number of questions per part\n" "- Tag completeness and Company/GATE tags\n" "- Difficulty index 1.8-2.5\n" "- Duplications or ambiguous statements\n" "Return a JSON object: {'corrections': [...], 'issues': [...]}" ) verifier_input = json.dumps(qp_json)[:50000] ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0) try: ver_json = json.loads(ver_out) except Exception: ver_json = {"raw": ver_out} result["verifier"] = ver_json except Exception as e: result["verifier"] = {"error": str(e)} # AGENT 3: FORMATTER try: fmt_prompt = ( "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, " "and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n" + json.dumps(qp_json)[:50000] + "\n\nVERIFIER_CORRECTIONS:\n" + json.dumps(result["verifier"])[:50000] + "\n\nReturn ONE valid JSON object." ) fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0) try: final_json = json.loads(fmt_out) except Exception: final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]} result["final"] = final_json except Exception as e: result["final"] = {"error": str(e)} except Exception: result["errors"].append(traceback.format_exc()) return result # --------------------------- # DOCX builder functions (robust) # --------------------------- def _add_paragraph(doc, text, bold=False): p = doc.add_paragraph() run = p.add_run(text) run.bold = bold def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str): from docx import Document doc = Document() doc.add_heading(f"SNS College of Technology — {subject}", level=1) doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.") doc.add_paragraph("\nPrintable Question Paper:\n") if generator_raw: doc.add_paragraph(generator_raw[:20000]) questions = [] try: if isinstance(final_json, dict): fq = final_json.get("final_qp") or final_json.get("final") or final_json if isinstance(fq, dict): questions = fq.get("questions", []) or [] except Exception: questions = [] if questions: table = doc.add_table(rows=1, cols=5) hdr = table.rows[0].cells hdr[0].text = "Q.No" hdr[1].text = "SubQ" hdr[2].text = "Question" hdr[3].text = "Course Outcome" hdr[4].text = "Bloom / Tags" for q in questions: row = table.add_row().cells row[0].text = str(q.get("question_no", "")) row[1].text = str(q.get("sub_no", "")) row[2].text = str(q.get("question_text", "")).strip() row[3].text = str(q.get("course_outcome", "")) row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}" else: doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.") doc.save(path) def build_answers_docx(path: Path, final_json: Optional[dict], subject: str): from docx import Document doc = Document() doc.add_heading(f"Answer Key — {subject}", level=1) answers = {} if isinstance(final_json, dict): # try multiple possible locations answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {} if isinstance(answers, dict) and answers: for k, v in answers.items(): p = doc.add_paragraph() p.add_run(f"{k}:\n").bold = True doc.add_paragraph(str(v)) else: # fallback: safe dump safe_dump = "" try: safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000] except Exception: safe_dump = str(final_json)[:15000] doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):") doc.add_paragraph(safe_dump) doc.save(path) def build_obe_docx(path: Path, final_json: Optional[dict], subject: str): from docx import Document doc = Document() doc.add_heading(f"OBE Summary — {subject}", level=1) obe = {} if isinstance(final_json, dict): obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {} try: doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000]) except Exception: doc.add_paragraph(str(obe)[:15000]) doc.save(path) # --------------------------- # Initialize clients (raise friendly error if secrets missing) # --------------------------- try: groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY")) serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY")) orchestrator = MultiAgentOrchestrator(groq_client, serp_client) except Exception as e: orchestrator = None init_error = str(e) else: init_error = None # --------------------------- # Gradio UI: single-file app # --------------------------- def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file): if init_error: return None, None, None, f"Server init error: {init_error}" try: # extract text from uploaded syllabus and reference QP syllabus_text = extract_text_from_gradio_file(syllabus_file) ref_text = extract_text_from_gradio_file(ref_file) if ref_file else "" if not syllabus_text: sample_path = "/mnt/data/cloud_computing_syllabus.txt" msg = ("Syllabus extraction failed or file empty. " f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.") return None, None, None, msg # call orchestrator out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text) # Ensure final_json is always a dict (fallback if None or invalid) raw_final = out.get("final") if isinstance(raw_final, dict): final_json = raw_final else: final_json = { "final_qp": {"questions": []}, "answers": {}, "obe": {}, "error": "Formatter returned invalid JSON or None.", "generator_raw_sample": (out.get("generator_raw") or "")[:5000] } gen_raw = out.get("generator_raw", "") # write docx files to temp dir tmpdir = Path(tempfile.mkdtemp()) qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx" ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx" obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx" build_question_paper_docx(qp_path, final_json, gen_raw, subject) build_answers_docx(ans_path, final_json, subject) build_obe_docx(obe_path, final_json, subject) return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully." except Exception as e: tb = traceback.format_exc() return None, None, None, f"Generation failed: {e}\n\n{tb}" # Build UI with gr.Blocks() as app: gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app") if init_error: gr.Markdown(f"**Initialization error:** {init_error}") with gr.Row(): subject = gr.Textbox(label="Subject Name", value="Cloud Computing") stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE") with gr.Row(): partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0) partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0) partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0) syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)") ref_qp = gr.File(label="Reference QP (optional)") generate_btn = gr.Button("Generate Question Paper") qp_file = gr.File(label="Question Paper (.docx)") ans_file = gr.File(label="Answer Key (.docx)") obe_file = gr.File(label="OBE Summary (.docx)") status = gr.Markdown("Status: Idle") generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status]) # Launch if __name__ == "__main__": app.launch()