Spaces:

codeboosterstech
/

SNS

Running

App Files Files Community

codeboosterstech commited on Nov 24, 2025

Commit

9ec6879

verified ·

1 Parent(s): 9203f63

Update app.py

Browse files

Files changed (1) hide show

app.py +383 -120

app.py CHANGED Viewed

@@ -1,142 +1,405 @@
 import os
 import tempfile
 from pathlib import Path
 import gradio as gr
-from pypdf import PdfReader
-from agents import GroqClient, SerpClient
-from multi_agent import MultiAgentOrchestrator
-from docx_builder import (
-    build_question_paper_docx,
-    build_answers_docx,
-    build_obe_docx,
-)
-# =====================================================
-#  SAFE FILE TEXT EXTRACTION (TXT, DOCX, PDF Supported)
-# =====================================================
-def extract_text(file_obj):
-    """Reads syllabus from .txt, .docx or .pdf without breaking."""
-    name = file_obj.name.lower()
-    # TXT
-    if name.endswith(".txt"):
-        content = file_obj.read()
-        try:
-            return content.decode("utf-8", errors="ignore")
-        except:
-            return content.decode("latin-1", errors="ignore")
-    # DOCX
-    if name.endswith(".docx"):
         try:
-            from docx import Document
-            doc = Document(file_obj.name)
-            return "\n".join([p.text for p in doc.paragraphs])
-        except:
-            return ""
-    # PDF
-    if name.endswith(".pdf"):
         try:
-            reader = PdfReader(file_obj.name)
-            pages = [p.extract_text() or "" for p in reader.pages]
-            return "\n".join(pages)
         except Exception:
-            return ""
-    return ""
-# =====================================================
-#  INITIALIZE CLIENTS
-# =====================================================
-def init_clients():
-    groq_key = os.getenv("GROQ_API_KEY")
-    serp_key = os.getenv("SERPAPI_KEY")
-    groq = GroqClient(api_key=groq_key)
-    serp = SerpClient(api_key=serp_key)
-    return groq, serp
-groq_client, serp_client = init_clients()
-orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
-# =====================================================
-#  MAIN PIPELINE FUNCTION
-# =====================================================
-def run_system(subject, stream, partA, partB, partC, syl_file, ref_file):
-    if syl_file is None:
-        return None, None, None, "Error: Upload a syllabus file."
-    # Extract text safely
-    syllabus_text = extract_text(syl_file)
-    ref_text = extract_text(ref_file) if ref_file else ""
-    # Run multi-agent orchestration
-    output = orchestrator.run_pipeline(
-        subject=subject,
-        stream=stream,
-        partA=int(partA),
-        partB=int(partB),
-        partC=int(partC),
-        syllabus_text=syllabus_text,
-        ref_qp_text=ref_text,
-    )
-    final_json = output.get("final", {})
-    generator_raw = output.get("generator_raw", "")
-    # Temporary directory for docx exports
-    tmpdir = Path(tempfile.mkdtemp())
-    qp_path = tmpdir / f"{subject}_QuestionPaper.docx"
-    ans_path = tmpdir / f"{subject}_AnswerKey.docx"
-    obe_path = tmpdir / f"{subject}_OBE_Summary.docx"
-    # Build all DOCX files
-    build_question_paper_docx(qp_path, final_json, generator_raw, subject)
-    build_answers_docx(ans_path, final_json, subject)
-    build_obe_docx(obe_path, final_json, subject)
-    # RETURN FILE PATHS AS STRINGS (required by Gradio)
-    return str(qp_path), str(ans_path), str(obe_path), "Generation Complete!"
-# =====================================================
-#  GRADIO USER INTERFACE
-# =====================================================
 with gr.Blocks() as app:
-    gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI)")
     with gr.Row():
-        subject = gr.Textbox(label="Subject Name", placeholder="Cloud Computing")
-        stream = gr.Dropdown(["CSE", "Non-CSE"], label="Stream", value="CSE")
     with gr.Row():
-        partA = gr.Number(label="Part A Questions", value=10, precision=0)
-        partB = gr.Number(label="Part B Questions (Either/Or)", value=5, precision=0)
-        partC = gr.Number(label="Part C Questions", value=1, precision=0)
-    syllabus = gr.File(label="Upload Syllabus (.pdf / .txt / .docx)")
-    ref_qp = gr.File(label="Upload Reference QP (Optional)")
     generate_btn = gr.Button("Generate Question Paper")
-    qp_output = gr.File(label="Download Question Paper")
-    ans_output = gr.File(label="Download Answer Key")
-    obe_output = gr.File(label="Download OBE Summary")
-    status = gr.Markdown("Status: Waiting...")
-    generate_btn.click(
-        run_system,
-        inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp],
-        outputs=[qp_output, ans_output, obe_output, status]
-    )
-app.launch()

+# app.py (single-file, all-in-one)
 import os
+import json
 import tempfile
+import traceback
 from pathlib import Path
+from typing import Optional, Dict, Any, List
+import requests
 import gradio as gr
+# ---------------------------
+# CONFIG / MODELS (Groq model ids)
+# ---------------------------
+GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile")
+VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it")
+FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768")
+GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
+SERP_URL = "https://serpapi.com/search"
+# ---------------------------
+# Helpers: Groq Client & SerpClient
+# ---------------------------
+class GroqClient:
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.getenv("GROQ_API_KEY")
+        if not self.api_key:
+            raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.")
+        self.url = GROQ_URL
+        self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+    def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
+        payload = {
+            "model": model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": 0.95
+        }
+        resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120)
+        if resp.status_code != 200:
+            raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}")
+        data = resp.json()
         try:
+            return data["choices"][0]["message"]["content"]
+        except Exception:
+            return json.dumps(data)
+    def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
+        messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
+        return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature)
+class SerpClient:
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.getenv("SERPAPI_KEY")
+        if not self.api_key:
+            raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.")
+        self.url = SERP_URL
+    def search(self, query: str, num: int = 5) -> Dict[str, Any]:
+        params = {"q": query, "api_key": self.api_key, "num": num}
+        resp = requests.get(self.url, params=params, timeout=30)
+        if resp.status_code != 200:
+            raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}")
+        return resp.json()
+# ---------------------------
+# Safe file text extraction (Gradio returns FileData dict: {"name", "size", "path"})
+# ---------------------------
+def extract_text_from_gradio_file(filedata: Optional[dict]) -> str:
+    if not filedata:
+        return ""
+    # filedata is a dict with 'name' and 'path' in Spaces
+    file_path = filedata.get("path") or filedata.get("tmp_path") or filedata.get("name")
+    if not file_path:
+        return ""
+    file_path = str(file_path)
+    try:
+        lower = file_path.lower()
+        if lower.endswith(".txt"):
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                return f.read()
+        if lower.endswith(".pdf"):
+            try:
+                from pypdf import PdfReader
+                reader = PdfReader(file_path)
+                return "\n".join([p.extract_text() or "" for p in reader.pages])
+            except Exception:
+                # fallback: try binary read and decode
+                with open(file_path, "rb") as f:
+                    return f.read().decode("utf-8", errors="ignore")
+        if lower.endswith(".docx"):
+            try:
+                import docx
+                doc = docx.Document(file_path)
+                return "\n".join([p.text for p in doc.paragraphs])
+            except Exception:
+                with open(file_path, "rb") as f:
+                    return f.read().decode("utf-8", errors="ignore")
+        # Fallback: read bytes
+        with open(file_path, "rb") as f:
+            return f.read().decode("utf-8", errors="ignore")
+    except Exception:
+        return ""
+# ---------------------------
+# Prompt Templates (CSE and Non-CSE) - use your exact doc content here if available
+# ---------------------------
+NONCSE_TEMPLATE = """
+Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
+Task: Generate an internal/continuous-assessment question paper matching GATE style.
+Rules:
+- Part A: {partA} questions, approx 2 marks each (adjust if marks per question different).
+- Part B: {partB} questions, choice/either-or pairs (marks per question ~13; adapt per template).
+- Part C: {partC} questions, case/design (higher marks).
+- Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
+- Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
+- Maintain difficulty index between 1.8 and 2.5.
+- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end, containing "questions" list with fields:
+  question_no, part, sub_no, marks, unit, course_outcome, bloom_level, tags, question_text
+"""
+CSE_TEMPLATE = """
+Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
+Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
+Rules:
+- Part A: {partA} questions (short-answer)
+- Part B: {partB} questions (Either/Or; marks per question ~16)
+- Part C: {partC} questions (case/design)
+- Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
+- 20% of questions must be industry/case-study oriented.
+- Provide printable QP and VALID JSON <<QP_JSON>> as described above.
+"""
+def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
+    template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
+    prompt = template.format(partA=partA, partB=partB, partC=partC)
+    prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array with the schema described above. JSON must be valid.\n"
+    return prompt
+# ---------------------------
+# Utility: extract JSON suffix from generator text
+# ---------------------------
+def extract_json_from_text(text: str) -> Optional[dict]:
+    if not text:
+        return None
+    # try to locate <<QP_JSON>>
+    idx = text.rfind("<<QP_JSON>>")
+    candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None
+    if candidate:
         try:
+            return json.loads(candidate)
         except Exception:
+            # try find last '{'
+            try:
+                start = text.rfind("{")
+                return json.loads(text[start:])
+            except Exception:
+                return None
+    else:
+        # fallback: try parse last {...}
+        try:
+            start = text.rfind("{")
+            return json.loads(text[start:])
+        except Exception:
+            return None
+# ---------------------------
+# Multi-agent orchestrator (inlined)
+# ---------------------------
+class MultiAgentOrchestrator:
+    def __init__(self, groq_client: GroqClient, serp_client: SerpClient):
+        self.groq = groq_client
+        self.serp = serp_client
+    def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str:
+        try:
+            q = f"{subject} recent developments 2024 2025"
+            out = self.serp.search(q, num=n)
+            snippets = []
+            # serpapi returns organic_results usually
+            for item in out.get("organic_results", [])[:n]:
+                title = item.get("title", "")
+                snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
+                link = item.get("link", "")
+                if title or snippet:
+                    snippets.append(f"{title}\n{snippet}\n{link}")
+            if not snippets and "answer" in out:
+                snippets.append(str(out.get("answer")))
+            return "\n\n".join(snippets)
+        except Exception as e:
+            return ""
+    def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
+        result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []}
+        try:
+            realtime = self.fetch_realtime_snippets(subject)
+            prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime)
+            # AGENT 1: GENERATOR
+            try:
+                gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0)
+            except Exception as e:
+                raise RuntimeError(f"Generator agent failed: {e}")
+            result["generator_raw"] = gen_out
+            # Try extract JSON
+            qp_json = extract_json_from_text(gen_out)
+            if qp_json is None:
+                # ask generator for JSON only
+                json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
+                gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
+                try:
+                    qp_json = json.loads(gen_json_only)
+                except Exception:
+                    qp_json = {"raw_text": gen_out}
+            result["qp_json"] = qp_json
+            # AGENT 2: VERIFIER
+            try:
+                verifier_prompt = f"You are an academic verifier. Verify the QP JSON below for:\n- Bloom's taxonomy correctness\n- Unit coverage and distribution\n- Correct number of questions per part\n- Tag completeness and Company/GATE tags\n- Difficulty index 1.8-2.5\n- Duplications or ambiguous statements\nReturn a JSON object: {{'corrections': [...], 'issues': [...]}}"
+                verifier_input = json.dumps(qp_json)[:50000]
+                ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
+                try:
+                    ver_json = json.loads(ver_out)
+                except Exception:
+                    # If not valid JSON, return raw text under 'raw'
+                    ver_json = {"raw": ver_out}
+                result["verifier"] = ver_json
+            except Exception as e:
+                result["verifier"] = {"error": str(e)}
+            # AGENT 3: FORMATTER (apply corrections & produce final JSON)
+            try:
+                fmt_prompt = (
+                    "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
+                    "and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n"
+                    + json.dumps(qp_json)[:50000]
+                    + "\n\nVERIFIER_CORRECTIONS:\n"
+                    + json.dumps(result["verifier"])[:50000]
+                    + "\n\nReturn ONE valid JSON object."
+                )
+                fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0)
+                try:
+                    final_json = json.loads(fmt_out)
+                except Exception:
+                    final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]}
+                result["final"] = final_json
+            except Exception as e:
+                result["final"] = {"error": str(e)}
+        except Exception as e:
+            result["errors"].append(traceback.format_exc())
+        return result
+# ---------------------------
+# DOCX builder functions (inline)
+# ---------------------------
+def _add_paragraph(doc, text, bold=False):
+    run = doc.add_paragraph().add_run(text)
+    run.bold = bold
+def build_question_paper_docx(path: Path, final_json: dict, generator_raw: str, subject: str):
+    from docx import Document
+    doc = Document()
+    doc.add_heading(f"SNS College of Technology — {subject}", level=1)
+    doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
+    doc.add_paragraph("\nPrintable Question Paper:\n")
+    if generator_raw:
+        # limit to a large but safe size
+        doc.add_paragraph(generator_raw[:20000])
+    # If structured final_json contains final_qp.questions, create a table
+    questions = []
+    if isinstance(final_json, dict):
+        fq = final_json.get("final_qp") or final_json.get("final") or final_json
+        if isinstance(fq, dict):
+            questions = fq.get("questions", []) or []
+    if questions:
+        table = doc.add_table(rows=1, cols=5)
+        hdr = table.rows[0].cells
+        hdr[0].text = "Q.No"
+        hdr[1].text = "SubQ"
+        hdr[2].text = "Question"
+        hdr[3].text = "Course Outcome"
+        hdr[4].text = "Bloom / Tags"
+        for q in questions:
+            row = table.add_row().cells
+            row[0].text = str(q.get("question_no", ""))
+            row[1].text = str(q.get("sub_no", ""))
+            row[2].text = str(q.get("question_text", "")).strip()
+            row[3].text = str(q.get("course_outcome", ""))
+            row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
+    doc.save(path)
+def build_answers_docx(path: Path, final_json: dict, subject: str):
+    from docx import Document
+    doc = Document()
+    doc.add_heading(f"Answer Key — {subject}", level=1)
+    answers = {}
+    if isinstance(final_json, dict):
+        answers = final_json.get("answers", {}) or final_json.get("final", {}).get("answers", {}) or {}
+    if isinstance(answers, dict) and answers:
+        for k, v in answers.items():
+            p = doc.add_paragraph()
+            p.add_run(f"{k}:\n").bold = True
+            doc.add_paragraph(str(v))
+    else:
+        doc.add_paragraph(json.dumps(final_json.get("answers", final_json), indent=2)[:15000])
+    doc.save(path)
+def build_obe_docx(path: Path, final_json: dict, subject: str):
+    from docx import Document
+    doc = Document()
+    doc.add_heading(f"OBE Summary — {subject}", level=1)
+    obe = {}
+    if isinstance(final_json, dict):
+        obe = final_json.get("obe", {}) or final_json.get("final", {}).get("obe", {}) or {}
+    doc.add_paragraph(json.dumps(obe, indent=2)[:15000])
+    doc.save(path)
+# ---------------------------
+# Initialize clients (raise friendly error if secrets missing)
+# ---------------------------
+try:
+    groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY"))
+    serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY"))
+    orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
+except Exception as e:
+    orchestrator = None
+    init_error = str(e)
+else:
+    init_error = None
+# ---------------------------
+# Gradio UI: single-file app
+# ---------------------------
+def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file):
+    if init_error:
+        return None, None, None, f"Server init error: {init_error}"
+    try:
+        # extract text from uploaded syllabus and reference QP
+        syllabus_text = extract_text_from_gradio_file(syllabus_file)
+        ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
+        if not syllabus_text:
+            # If the user uploaded nothing or extraction failed, show helpful message referencing the sample file
+            sample_path = "/mnt/data/cloud_computing_syllabus.txt"
+            msg = ("Syllabus extraction failed or file empty. "
+                   f"If you want to test immediately, you can use the sample syllabus located at: {sample_path} "
+                   "Upload a .txt/.pdf/.docx file instead.")
+            return None, None, None, msg
+        # call orchestrator
+        out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)
+        final_json = out.get("final", {})
+        gen_raw = out.get("generator_raw", "")
+        # write docx files to temp dir
+        tmpdir = Path(tempfile.mkdtemp())
+        qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx"
+        ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx"
+        obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx"
+        build_question_paper_docx(qp_path, final_json, gen_raw, subject)
+        build_answers_docx(ans_path, final_json, subject)
+        build_obe_docx(obe_path, final_json, subject)
+        return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully."
+    except Exception as e:
+        tb = traceback.format_exc()
+        return None, None, None, f"Generation failed: {e}\n\n{tb}"
+# Build UI
 with gr.Blocks() as app:
+    gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app")
+    if init_error:
+        gr.Markdown(f"**Initialization error:** {init_error}")
     with gr.Row():
+        subject = gr.Textbox(label="Subject Name", value="Cloud Computing")
+        stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE")
     with gr.Row():
+        partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0)
+        partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0)
+        partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0)
+    syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)")
+    ref_qp = gr.File(label="Reference QP (optional)")
     generate_btn = gr.Button("Generate Question Paper")
+    qp_file = gr.File(label="Question Paper (.docx)")
+    ans_file = gr.File(label="Answer Key (.docx)")
+    obe_file = gr.File(label="OBE Summary (.docx)")
+    status = gr.Markdown("Status: Idle")
+    generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status])
+# Launch
+if __name__ == "__main__":
+    # On Spaces, gradio will handle host/port; for local testing you can set share=True
+    app.launch()