Spaces:

codeboosterstech
/

SNS

Sleeping

SNS

File size: 19,492 Bytes

# app.py (patched final single-file)
import os
import json
import tempfile
import traceback
from pathlib import Path
from typing import Optional, Dict, Any, List

import requests
import gradio as gr

# ---------------------------
# CONFIG / MODELS (Groq model ids)
# ---------------------------
GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile")
VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it")
FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768")

GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
SERP_URL = "https://serpapi.com/search"

# ---------------------------
# Helpers: Groq Client & SerpClient
# ---------------------------
class GroqClient:
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.getenv("GROQ_API_KEY")
        if not self.api_key:
            raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.")
        self.url = GROQ_URL
        self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}

    def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
        payload = {
            "model": model,
            "messages": messages,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": 0.95
        }
        resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120)
        if resp.status_code != 200:
            raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}")
        data = resp.json()
        try:
            return data["choices"][0]["message"]["content"]
        except Exception:
            return json.dumps(data)

    def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
        messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
        return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature)


class SerpClient:
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.getenv("SERPAPI_KEY")
        if not self.api_key:
            raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.")
        self.url = SERP_URL

    def search(self, query: str, num: int = 5) -> Dict[str, Any]:
        params = {"q": query, "api_key": self.api_key, "num": num}
        resp = requests.get(self.url, params=params, timeout=30)
        if resp.status_code != 200:
            raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}")
        return resp.json()

# ---------------------------
# Safe file text extraction (handles dict and NamedString)
# ---------------------------
def extract_text_from_gradio_file(filedata) -> str:
    """
    Accepts either:
      - HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n}
      - Gradio NamedString or plain string (e.g., "/tmp/..")
    Returns extracted text for .txt, .pdf, .docx, or a text fallback.
    """
    if not filedata:
        return ""

    # Determine file path
    if isinstance(filedata, dict):
        file_path = filedata.get("path") or filedata.get("name")
    else:
        # NamedString or plain string
        file_path = str(filedata)

    if not file_path:
        return ""

    try:
        lower = file_path.lower()
        if lower.endswith(".txt"):
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                return f.read()
        if lower.endswith(".pdf"):
            try:
                from pypdf import PdfReader
                reader = PdfReader(file_path)
                return "\n".join([p.extract_text() or "" for p in reader.pages])
            except Exception:
                with open(file_path, "rb") as f:
                    return f.read().decode("utf-8", errors="ignore")
        if lower.endswith(".docx"):
            try:
                import docx
                doc = docx.Document(file_path)
                return "\n".join([p.text for p in doc.paragraphs])
            except Exception:
                with open(file_path, "rb") as f:
                    return f.read().decode("utf-8", errors="ignore")
        # fallback: read bytes
        with open(file_path, "rb") as f:
            return f.read().decode("utf-8", errors="ignore")
    except Exception:
        return ""

# ---------------------------
# Prompt Templates (CSE and Non-CSE)
# ---------------------------
NONCSE_TEMPLATE = """
Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
Task: Generate an internal/continuous-assessment question paper matching GATE style.
Rules:
- Part A: {partA} questions, approx 2 marks each.
- Part B: {partB} questions, choice/either-or pairs.
- Part C: {partC} questions, case/design (higher marks).
- Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
- Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
- Maintain difficulty index between 1.8 and 2.5.
- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end containing "questions".
"""

CSE_TEMPLATE = """
Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
Rules:
- Part A: {partA} short-answer questions.
- Part B: {partB} questions (Either/Or pairs).
- Part C: {partC} questions (case/design).
- Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
- 20% of questions must be industry/case-study oriented.
- Provide printable QP and VALID JSON <<QP_JSON>> as described above.
"""

def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
    template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
    prompt = template.format(partA=partA, partB=partB, partC=partC)
    prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array. JSON must be valid.\n"
    return prompt

# ---------------------------
# Utility: extract JSON suffix from generator text
# ---------------------------
def extract_json_from_text(text: str) -> Optional[dict]:
    if not text:
        return None
    # try to locate <<QP_JSON>>
    idx = text.rfind("<<QP_JSON>>")
    candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None
    if candidate:
        try:
            return json.loads(candidate)
        except Exception:
            try:
                start = text.rfind("{")
                return json.loads(text[start:])
            except Exception:
                return None
    else:
        try:
            start = text.rfind("{")
            return json.loads(text[start:])
        except Exception:
            return None

# ---------------------------
# Multi-agent orchestrator (inlined)
# ---------------------------
class MultiAgentOrchestrator:
    def __init__(self, groq_client: GroqClient, serp_client: SerpClient):
        self.groq = groq_client
        self.serp = serp_client

    def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str:
        try:
            q = f"{subject} recent developments 2024 2025"
            out = self.serp.search(q, num=n)
            snippets = []
            for item in out.get("organic_results", [])[:n]:
                title = item.get("title", "")
                snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
                link = item.get("link", "")
                if title or snippet:
                    snippets.append(f"{title}\n{snippet}\n{link}")
            if not snippets and "answer" in out:
                snippets.append(str(out.get("answer")))
            return "\n\n".join(snippets)
        except Exception:
            return ""

    def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
        result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []}
        try:
            realtime = self.fetch_realtime_snippets(subject)
            prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime)

            # AGENT 1: GENERATOR
            try:
                gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0)
            except Exception as e:
                raise RuntimeError(f"Generator agent failed: {e}")
            result["generator_raw"] = gen_out

            # Try extract JSON
            qp_json = extract_json_from_text(gen_out)
            if qp_json is None:
                json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
                gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
                try:
                    qp_json = json.loads(gen_json_only)
                except Exception:
                    qp_json = {"raw_text": gen_out}
            result["qp_json"] = qp_json

            # AGENT 2: VERIFIER
            try:
                verifier_prompt = (
                    "You are an academic verifier. Verify the QP JSON below for:\n"
                    "- Bloom's taxonomy correctness\n"
                    "- Unit coverage and distribution\n"
                    "- Correct number of questions per part\n"
                    "- Tag completeness and Company/GATE tags\n"
                    "- Difficulty index 1.8-2.5\n"
                    "- Duplications or ambiguous statements\n"
                    "Return a JSON object: {'corrections': [...], 'issues': [...]}"
                )
                verifier_input = json.dumps(qp_json)[:50000]
                ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
                try:
                    ver_json = json.loads(ver_out)
                except Exception:
                    ver_json = {"raw": ver_out}
                result["verifier"] = ver_json
            except Exception as e:
                result["verifier"] = {"error": str(e)}

            # AGENT 3: FORMATTER
            try:
                fmt_prompt = (
                    "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
                    "and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n"
                    + json.dumps(qp_json)[:50000]
                    + "\n\nVERIFIER_CORRECTIONS:\n"
                    + json.dumps(result["verifier"])[:50000]
                    + "\n\nReturn ONE valid JSON object."
                )
                fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0)
                try:
                    final_json = json.loads(fmt_out)
                except Exception:
                    final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]}
                result["final"] = final_json
            except Exception as e:
                result["final"] = {"error": str(e)}
        except Exception:
            result["errors"].append(traceback.format_exc())
        return result

# ---------------------------
# DOCX builder functions (robust)
# ---------------------------
def _add_paragraph(doc, text, bold=False):
    p = doc.add_paragraph()
    run = p.add_run(text)
    run.bold = bold

def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str):
    from docx import Document
    doc = Document()
    doc.add_heading(f"SNS College of Technology — {subject}", level=1)
    doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
    doc.add_paragraph("\nPrintable Question Paper:\n")
    if generator_raw:
        doc.add_paragraph(generator_raw[:20000])

    questions = []
    try:
        if isinstance(final_json, dict):
            fq = final_json.get("final_qp") or final_json.get("final") or final_json
            if isinstance(fq, dict):
                questions = fq.get("questions", []) or []
    except Exception:
        questions = []

    if questions:
        table = doc.add_table(rows=1, cols=5)
        hdr = table.rows[0].cells
        hdr[0].text = "Q.No"
        hdr[1].text = "SubQ"
        hdr[2].text = "Question"
        hdr[3].text = "Course Outcome"
        hdr[4].text = "Bloom / Tags"
        for q in questions:
            row = table.add_row().cells
            row[0].text = str(q.get("question_no", ""))
            row[1].text = str(q.get("sub_no", ""))
            row[2].text = str(q.get("question_text", "")).strip()
            row[3].text = str(q.get("course_outcome", ""))
            row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
    else:
        doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.")

    doc.save(path)

def build_answers_docx(path: Path, final_json: Optional[dict], subject: str):
    from docx import Document
    doc = Document()
    doc.add_heading(f"Answer Key — {subject}", level=1)

    answers = {}
    if isinstance(final_json, dict):
        # try multiple possible locations
        answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {}
    if isinstance(answers, dict) and answers:
        for k, v in answers.items():
            p = doc.add_paragraph()
            p.add_run(f"{k}:\n").bold = True
            doc.add_paragraph(str(v))
    else:
        # fallback: safe dump
        safe_dump = ""
        try:
            safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000]
        except Exception:
            safe_dump = str(final_json)[:15000]
        doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):")
        doc.add_paragraph(safe_dump)

    doc.save(path)

def build_obe_docx(path: Path, final_json: Optional[dict], subject: str):
    from docx import Document
    doc = Document()
    doc.add_heading(f"OBE Summary — {subject}", level=1)

    obe = {}
    if isinstance(final_json, dict):
        obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {}
    try:
        doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000])
    except Exception:
        doc.add_paragraph(str(obe)[:15000])

    doc.save(path)

# ---------------------------
# Initialize clients (raise friendly error if secrets missing)
# ---------------------------
try:
    groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY"))
    serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY"))
    orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
except Exception as e:
    orchestrator = None
    init_error = str(e)
else:
    init_error = None

# ---------------------------
# Gradio UI: single-file app
# ---------------------------
def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file):
    if init_error:
        return None, None, None, f"Server init error: {init_error}"
    try:
        # extract text from uploaded syllabus and reference QP
        syllabus_text = extract_text_from_gradio_file(syllabus_file)
        ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
        if not syllabus_text:
            sample_path = "/mnt/data/cloud_computing_syllabus.txt"
            msg = ("Syllabus extraction failed or file empty. "
                   f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.")
            return None, None, None, msg

        # call orchestrator
        out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)

        # Ensure final_json is always a dict (fallback if None or invalid)
        raw_final = out.get("final")
        if isinstance(raw_final, dict):
            final_json = raw_final
        else:
            final_json = {
                "final_qp": {"questions": []},
                "answers": {},
                "obe": {},
                "error": "Formatter returned invalid JSON or None.",
                "generator_raw_sample": (out.get("generator_raw") or "")[:5000]
            }

        gen_raw = out.get("generator_raw", "")

        # write docx files to temp dir
        tmpdir = Path(tempfile.mkdtemp())
        qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx"
        ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx"
        obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx"

        build_question_paper_docx(qp_path, final_json, gen_raw, subject)
        build_answers_docx(ans_path, final_json, subject)
        build_obe_docx(obe_path, final_json, subject)

        return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully."

    except Exception as e:
        tb = traceback.format_exc()
        return None, None, None, f"Generation failed: {e}\n\n{tb}"

# Build UI
with gr.Blocks() as app:
    gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app")
    if init_error:
        gr.Markdown(f"**Initialization error:** {init_error}")

    with gr.Row():
        subject = gr.Textbox(label="Subject Name", value="Cloud Computing")
        stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE")

    with gr.Row():
        partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0)
        partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0)
        partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0)

    syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)")
    ref_qp = gr.File(label="Reference QP (optional)")

    generate_btn = gr.Button("Generate Question Paper")

    qp_file = gr.File(label="Question Paper (.docx)")
    ans_file = gr.File(label="Answer Key (.docx)")
    obe_file = gr.File(label="OBE Summary (.docx)")
    status = gr.Markdown("Status: Idle")

    generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status])

# Launch
if __name__ == "__main__":
    app.launch()