Spaces:
Sleeping
Sleeping
| # app.py (patched final single-file) | |
| import os | |
| import json | |
| import tempfile | |
| import traceback | |
| from pathlib import Path | |
| from typing import Optional, Dict, Any, List | |
| import requests | |
| import gradio as gr | |
| # --------------------------- | |
| # CONFIG / MODELS (Groq model ids) | |
| # --------------------------- | |
| GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile") | |
| VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it") | |
| FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768") | |
| GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| SERP_URL = "https://serpapi.com/search" | |
| # --------------------------- | |
| # Helpers: Groq Client & SerpClient | |
| # --------------------------- | |
| class GroqClient: | |
| def __init__(self, api_key: Optional[str] = None): | |
| self.api_key = api_key or os.getenv("GROQ_API_KEY") | |
| if not self.api_key: | |
| raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.") | |
| self.url = GROQ_URL | |
| self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} | |
| def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str: | |
| payload = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": 0.95 | |
| } | |
| resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120) | |
| if resp.status_code != 200: | |
| raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}") | |
| data = resp.json() | |
| try: | |
| return data["choices"][0]["message"]["content"] | |
| except Exception: | |
| return json.dumps(data) | |
| def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str: | |
| messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] | |
| return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature) | |
| class SerpClient: | |
| def __init__(self, api_key: Optional[str] = None): | |
| self.api_key = api_key or os.getenv("SERPAPI_KEY") | |
| if not self.api_key: | |
| raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.") | |
| self.url = SERP_URL | |
| def search(self, query: str, num: int = 5) -> Dict[str, Any]: | |
| params = {"q": query, "api_key": self.api_key, "num": num} | |
| resp = requests.get(self.url, params=params, timeout=30) | |
| if resp.status_code != 200: | |
| raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}") | |
| return resp.json() | |
| # --------------------------- | |
| # Safe file text extraction (handles dict and NamedString) | |
| # --------------------------- | |
| def extract_text_from_gradio_file(filedata) -> str: | |
| """ | |
| Accepts either: | |
| - HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n} | |
| - Gradio NamedString or plain string (e.g., "/tmp/..") | |
| Returns extracted text for .txt, .pdf, .docx, or a text fallback. | |
| """ | |
| if not filedata: | |
| return "" | |
| # Determine file path | |
| if isinstance(filedata, dict): | |
| file_path = filedata.get("path") or filedata.get("name") | |
| else: | |
| # NamedString or plain string | |
| file_path = str(filedata) | |
| if not file_path: | |
| return "" | |
| try: | |
| lower = file_path.lower() | |
| if lower.endswith(".txt"): | |
| with open(file_path, "r", encoding="utf-8", errors="ignore") as f: | |
| return f.read() | |
| if lower.endswith(".pdf"): | |
| try: | |
| from pypdf import PdfReader | |
| reader = PdfReader(file_path) | |
| return "\n".join([p.extract_text() or "" for p in reader.pages]) | |
| except Exception: | |
| with open(file_path, "rb") as f: | |
| return f.read().decode("utf-8", errors="ignore") | |
| if lower.endswith(".docx"): | |
| try: | |
| import docx | |
| doc = docx.Document(file_path) | |
| return "\n".join([p.text for p in doc.paragraphs]) | |
| except Exception: | |
| with open(file_path, "rb") as f: | |
| return f.read().decode("utf-8", errors="ignore") | |
| # fallback: read bytes | |
| with open(file_path, "rb") as f: | |
| return f.read().decode("utf-8", errors="ignore") | |
| except Exception: | |
| return "" | |
| # --------------------------- | |
| # Prompt Templates (CSE and Non-CSE) | |
| # --------------------------- | |
| NONCSE_TEMPLATE = """ | |
| Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE). | |
| Task: Generate an internal/continuous-assessment question paper matching GATE style. | |
| Rules: | |
| - Part A: {partA} questions, approx 2 marks each. | |
| - Part B: {partB} questions, choice/either-or pairs. | |
| - Part C: {partC} questions, case/design (higher marks). | |
| - Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>) | |
| - Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions. | |
| - Maintain difficulty index between 1.8 and 2.5. | |
| - Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end containing "questions". | |
| """ | |
| CSE_TEMPLATE = """ | |
| Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15. | |
| Task: Generate an internal/continuous-assessment question paper aligned with industry standards. | |
| Rules: | |
| - Part A: {partA} short-answer questions. | |
| - Part B: {partB} questions (Either/Or pairs). | |
| - Part C: {partC} questions (case/design). | |
| - Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>) | |
| - 20% of questions must be industry/case-study oriented. | |
| - Provide printable QP and VALID JSON <<QP_JSON>> as described above. | |
| """ | |
| def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str: | |
| template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE | |
| prompt = template.format(partA=partA, partB=partB, partC=partC) | |
| prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array. JSON must be valid.\n" | |
| return prompt | |
| # --------------------------- | |
| # Utility: extract JSON suffix from generator text | |
| # --------------------------- | |
| def extract_json_from_text(text: str) -> Optional[dict]: | |
| if not text: | |
| return None | |
| # try to locate <<QP_JSON>> | |
| idx = text.rfind("<<QP_JSON>>") | |
| candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None | |
| if candidate: | |
| try: | |
| return json.loads(candidate) | |
| except Exception: | |
| try: | |
| start = text.rfind("{") | |
| return json.loads(text[start:]) | |
| except Exception: | |
| return None | |
| else: | |
| try: | |
| start = text.rfind("{") | |
| return json.loads(text[start:]) | |
| except Exception: | |
| return None | |
| # --------------------------- | |
| # Multi-agent orchestrator (inlined) | |
| # --------------------------- | |
| class MultiAgentOrchestrator: | |
| def __init__(self, groq_client: GroqClient, serp_client: SerpClient): | |
| self.groq = groq_client | |
| self.serp = serp_client | |
| def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str: | |
| try: | |
| q = f"{subject} recent developments 2024 2025" | |
| out = self.serp.search(q, num=n) | |
| snippets = [] | |
| for item in out.get("organic_results", [])[:n]: | |
| title = item.get("title", "") | |
| snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "") | |
| link = item.get("link", "") | |
| if title or snippet: | |
| snippets.append(f"{title}\n{snippet}\n{link}") | |
| if not snippets and "answer" in out: | |
| snippets.append(str(out.get("answer"))) | |
| return "\n\n".join(snippets) | |
| except Exception: | |
| return "" | |
| def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]: | |
| result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []} | |
| try: | |
| realtime = self.fetch_realtime_snippets(subject) | |
| prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime) | |
| # AGENT 1: GENERATOR | |
| try: | |
| gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0) | |
| except Exception as e: | |
| raise RuntimeError(f"Generator agent failed: {e}") | |
| result["generator_raw"] = gen_out | |
| # Try extract JSON | |
| qp_json = extract_json_from_text(gen_out) | |
| if qp_json is None: | |
| json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)." | |
| gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0) | |
| try: | |
| qp_json = json.loads(gen_json_only) | |
| except Exception: | |
| qp_json = {"raw_text": gen_out} | |
| result["qp_json"] = qp_json | |
| # AGENT 2: VERIFIER | |
| try: | |
| verifier_prompt = ( | |
| "You are an academic verifier. Verify the QP JSON below for:\n" | |
| "- Bloom's taxonomy correctness\n" | |
| "- Unit coverage and distribution\n" | |
| "- Correct number of questions per part\n" | |
| "- Tag completeness and Company/GATE tags\n" | |
| "- Difficulty index 1.8-2.5\n" | |
| "- Duplications or ambiguous statements\n" | |
| "Return a JSON object: {'corrections': [...], 'issues': [...]}" | |
| ) | |
| verifier_input = json.dumps(qp_json)[:50000] | |
| ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0) | |
| try: | |
| ver_json = json.loads(ver_out) | |
| except Exception: | |
| ver_json = {"raw": ver_out} | |
| result["verifier"] = ver_json | |
| except Exception as e: | |
| result["verifier"] = {"error": str(e)} | |
| # AGENT 3: FORMATTER | |
| try: | |
| fmt_prompt = ( | |
| "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, " | |
| "and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n" | |
| + json.dumps(qp_json)[:50000] | |
| + "\n\nVERIFIER_CORRECTIONS:\n" | |
| + json.dumps(result["verifier"])[:50000] | |
| + "\n\nReturn ONE valid JSON object." | |
| ) | |
| fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0) | |
| try: | |
| final_json = json.loads(fmt_out) | |
| except Exception: | |
| final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]} | |
| result["final"] = final_json | |
| except Exception as e: | |
| result["final"] = {"error": str(e)} | |
| except Exception: | |
| result["errors"].append(traceback.format_exc()) | |
| return result | |
| # --------------------------- | |
| # DOCX builder functions (robust) | |
| # --------------------------- | |
| def _add_paragraph(doc, text, bold=False): | |
| p = doc.add_paragraph() | |
| run = p.add_run(text) | |
| run.bold = bold | |
| def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str): | |
| from docx import Document | |
| doc = Document() | |
| doc.add_heading(f"SNS College of Technology — {subject}", level=1) | |
| doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.") | |
| doc.add_paragraph("\nPrintable Question Paper:\n") | |
| if generator_raw: | |
| doc.add_paragraph(generator_raw[:20000]) | |
| questions = [] | |
| try: | |
| if isinstance(final_json, dict): | |
| fq = final_json.get("final_qp") or final_json.get("final") or final_json | |
| if isinstance(fq, dict): | |
| questions = fq.get("questions", []) or [] | |
| except Exception: | |
| questions = [] | |
| if questions: | |
| table = doc.add_table(rows=1, cols=5) | |
| hdr = table.rows[0].cells | |
| hdr[0].text = "Q.No" | |
| hdr[1].text = "SubQ" | |
| hdr[2].text = "Question" | |
| hdr[3].text = "Course Outcome" | |
| hdr[4].text = "Bloom / Tags" | |
| for q in questions: | |
| row = table.add_row().cells | |
| row[0].text = str(q.get("question_no", "")) | |
| row[1].text = str(q.get("sub_no", "")) | |
| row[2].text = str(q.get("question_text", "")).strip() | |
| row[3].text = str(q.get("course_outcome", "")) | |
| row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}" | |
| else: | |
| doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.") | |
| doc.save(path) | |
| def build_answers_docx(path: Path, final_json: Optional[dict], subject: str): | |
| from docx import Document | |
| doc = Document() | |
| doc.add_heading(f"Answer Key — {subject}", level=1) | |
| answers = {} | |
| if isinstance(final_json, dict): | |
| # try multiple possible locations | |
| answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {} | |
| if isinstance(answers, dict) and answers: | |
| for k, v in answers.items(): | |
| p = doc.add_paragraph() | |
| p.add_run(f"{k}:\n").bold = True | |
| doc.add_paragraph(str(v)) | |
| else: | |
| # fallback: safe dump | |
| safe_dump = "" | |
| try: | |
| safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000] | |
| except Exception: | |
| safe_dump = str(final_json)[:15000] | |
| doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):") | |
| doc.add_paragraph(safe_dump) | |
| doc.save(path) | |
| def build_obe_docx(path: Path, final_json: Optional[dict], subject: str): | |
| from docx import Document | |
| doc = Document() | |
| doc.add_heading(f"OBE Summary — {subject}", level=1) | |
| obe = {} | |
| if isinstance(final_json, dict): | |
| obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {} | |
| try: | |
| doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000]) | |
| except Exception: | |
| doc.add_paragraph(str(obe)[:15000]) | |
| doc.save(path) | |
| # --------------------------- | |
| # Initialize clients (raise friendly error if secrets missing) | |
| # --------------------------- | |
| try: | |
| groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY")) | |
| serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY")) | |
| orchestrator = MultiAgentOrchestrator(groq_client, serp_client) | |
| except Exception as e: | |
| orchestrator = None | |
| init_error = str(e) | |
| else: | |
| init_error = None | |
| # --------------------------- | |
| # Gradio UI: single-file app | |
| # --------------------------- | |
| def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file): | |
| if init_error: | |
| return None, None, None, f"Server init error: {init_error}" | |
| try: | |
| # extract text from uploaded syllabus and reference QP | |
| syllabus_text = extract_text_from_gradio_file(syllabus_file) | |
| ref_text = extract_text_from_gradio_file(ref_file) if ref_file else "" | |
| if not syllabus_text: | |
| sample_path = "/mnt/data/cloud_computing_syllabus.txt" | |
| msg = ("Syllabus extraction failed or file empty. " | |
| f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.") | |
| return None, None, None, msg | |
| # call orchestrator | |
| out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text) | |
| # Ensure final_json is always a dict (fallback if None or invalid) | |
| raw_final = out.get("final") | |
| if isinstance(raw_final, dict): | |
| final_json = raw_final | |
| else: | |
| final_json = { | |
| "final_qp": {"questions": []}, | |
| "answers": {}, | |
| "obe": {}, | |
| "error": "Formatter returned invalid JSON or None.", | |
| "generator_raw_sample": (out.get("generator_raw") or "")[:5000] | |
| } | |
| gen_raw = out.get("generator_raw", "") | |
| # write docx files to temp dir | |
| tmpdir = Path(tempfile.mkdtemp()) | |
| qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx" | |
| ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx" | |
| obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx" | |
| build_question_paper_docx(qp_path, final_json, gen_raw, subject) | |
| build_answers_docx(ans_path, final_json, subject) | |
| build_obe_docx(obe_path, final_json, subject) | |
| return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully." | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| return None, None, None, f"Generation failed: {e}\n\n{tb}" | |
| # Build UI | |
| with gr.Blocks() as app: | |
| gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app") | |
| if init_error: | |
| gr.Markdown(f"**Initialization error:** {init_error}") | |
| with gr.Row(): | |
| subject = gr.Textbox(label="Subject Name", value="Cloud Computing") | |
| stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE") | |
| with gr.Row(): | |
| partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0) | |
| partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0) | |
| partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0) | |
| syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)") | |
| ref_qp = gr.File(label="Reference QP (optional)") | |
| generate_btn = gr.Button("Generate Question Paper") | |
| qp_file = gr.File(label="Question Paper (.docx)") | |
| ans_file = gr.File(label="Answer Key (.docx)") | |
| obe_file = gr.File(label="OBE Summary (.docx)") | |
| status = gr.Markdown("Status: Idle") | |
| generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status]) | |
| # Launch | |
| if __name__ == "__main__": | |
| app.launch() | |