Spaces:

codeboosterstech
/

SNS

Sleeping

App Files Files Community

codeboosterstech commited on Nov 24, 2025

Commit

943a060

verified ·

1 Parent(s): e7a235b

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -67

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (single-file, all-in-one)
 import os
 import json
 import tempfile
@@ -67,78 +67,78 @@ class SerpClient:
         return resp.json()
 # ---------------------------
-# Safe file text extraction (Gradio returns FileData dict: {"name", "size", "path"})
 # ---------------------------
-def extract_text_from_gradio_file(filedata):
     """
-    Supports BOTH:
-    1. HF Spaces dict:
-       {"name": "..", "path": "...", "size": ...}
-    2. HF NamedString:
-       filedata = NamedString("/tmp/.../file.txt")
     """
     if not filedata:
         return ""
-    # Case A: filedata is a dict
     if isinstance(filedata, dict):
         file_path = filedata.get("path") or filedata.get("name")
-        if not file_path:
-            return ""
-        file_path = str(file_path)
-    # Case B: filedata is NamedString (just a string)
     else:
-        # Gradio NamedString gives direct file path
         file_path = str(filedata)
-    lower = file_path.lower()
     try:
         if lower.endswith(".txt"):
-            return open(file_path, "r", encoding="utf-8", errors="ignore").read()
         if lower.endswith(".pdf"):
-            from pypdf import PdfReader
-            reader = PdfReader(file_path)
-            return "\n".join([p.extract_text() or "" for p in reader.pages])
         if lower.endswith(".docx"):
-            import docx
-            doc = docx.Document(file_path)
-            return "\n".join([p.text for p in doc.paragraphs])
-        # fallback
         with open(file_path, "rb") as f:
             return f.read().decode("utf-8", errors="ignore")
     except Exception:
         return ""
 # ---------------------------
-# Prompt Templates (CSE and Non-CSE) - use your exact doc content here if available
 # ---------------------------
 NONCSE_TEMPLATE = """
 Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
 Task: Generate an internal/continuous-assessment question paper matching GATE style.
 Rules:
-- Part A: {partA} questions, approx 2 marks each (adjust if marks per question different).
-- Part B: {partB} questions, choice/either-or pairs (marks per question ~13; adapt per template).
 - Part C: {partC} questions, case/design (higher marks).
 - Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
 - Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
 - Maintain difficulty index between 1.8 and 2.5.
-- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end, containing "questions" list with fields:
-  question_no, part, sub_no, marks, unit, course_outcome, bloom_level, tags, question_text
 """
 CSE_TEMPLATE = """
 Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
 Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
 Rules:
-- Part A: {partA} questions (short-answer)
-- Part B: {partB} questions (Either/Or; marks per question ~16)
-- Part C: {partC} questions (case/design)
 - Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
 - 20% of questions must be industry/case-study oriented.
 - Provide printable QP and VALID JSON <<QP_JSON>> as described above.
@@ -147,7 +147,7 @@ Rules:
 def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
     template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
     prompt = template.format(partA=partA, partB=partB, partC=partC)
-    prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array with the schema described above. JSON must be valid.\n"
     return prompt
 # ---------------------------
@@ -163,14 +163,12 @@ def extract_json_from_text(text: str) -> Optional[dict]:
         try:
             return json.loads(candidate)
         except Exception:
-            # try find last '{'
             try:
                 start = text.rfind("{")
                 return json.loads(text[start:])
             except Exception:
                 return None
     else:
-        # fallback: try parse last {...}
         try:
             start = text.rfind("{")
             return json.loads(text[start:])
@@ -190,7 +188,6 @@ class MultiAgentOrchestrator:
             q = f"{subject} recent developments 2024 2025"
             out = self.serp.search(q, num=n)
             snippets = []
-            # serpapi returns organic_results usually
             for item in out.get("organic_results", [])[:n]:
                 title = item.get("title", "")
                 snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
@@ -200,7 +197,7 @@ class MultiAgentOrchestrator:
             if not snippets and "answer" in out:
                 snippets.append(str(out.get("answer")))
             return "\n\n".join(snippets)
-        except Exception as e:
             return ""
     def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
@@ -219,31 +216,37 @@ class MultiAgentOrchestrator:
             # Try extract JSON
             qp_json = extract_json_from_text(gen_out)
             if qp_json is None:
-                # ask generator for JSON only
                 json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
                 gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
                 try:
                     qp_json = json.loads(gen_json_only)
                 except Exception:
                     qp_json = {"raw_text": gen_out}
             result["qp_json"] = qp_json
             # AGENT 2: VERIFIER
             try:
-                verifier_prompt = f"You are an academic verifier. Verify the QP JSON below for:\n- Bloom's taxonomy correctness\n- Unit coverage and distribution\n- Correct number of questions per part\n- Tag completeness and Company/GATE tags\n- Difficulty index 1.8-2.5\n- Duplications or ambiguous statements\nReturn a JSON object: {{'corrections': [...], 'issues': [...]}}"
                 verifier_input = json.dumps(qp_json)[:50000]
                 ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
                 try:
                     ver_json = json.loads(ver_out)
                 except Exception:
-                    # If not valid JSON, return raw text under 'raw'
                     ver_json = {"raw": ver_out}
                 result["verifier"] = ver_json
             except Exception as e:
                 result["verifier"] = {"error": str(e)}
-            # AGENT 3: FORMATTER (apply corrections & produce final JSON)
             try:
                 fmt_prompt = (
                     "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
@@ -261,32 +264,36 @@ class MultiAgentOrchestrator:
                 result["final"] = final_json
             except Exception as e:
                 result["final"] = {"error": str(e)}
-        except Exception as e:
             result["errors"].append(traceback.format_exc())
         return result
 # ---------------------------
-# DOCX builder functions (inline)
 # ---------------------------
 def _add_paragraph(doc, text, bold=False):
-    run = doc.add_paragraph().add_run(text)
     run.bold = bold
-def build_question_paper_docx(path: Path, final_json: dict, generator_raw: str, subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"SNS College of Technology — {subject}", level=1)
     doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
     doc.add_paragraph("\nPrintable Question Paper:\n")
     if generator_raw:
-        # limit to a large but safe size
         doc.add_paragraph(generator_raw[:20000])
-    # If structured final_json contains final_qp.questions, create a table
     questions = []
-    if isinstance(final_json, dict):
-        fq = final_json.get("final_qp") or final_json.get("final") or final_json
-        if isinstance(fq, dict):
-            questions = fq.get("questions", []) or []
     if questions:
         table = doc.add_table(rows=1, cols=5)
         hdr = table.rows[0].cells
@@ -302,32 +309,50 @@ def build_question_paper_docx(path: Path, final_json: dict, generator_raw: str,
             row[2].text = str(q.get("question_text", "")).strip()
             row[3].text = str(q.get("course_outcome", ""))
             row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
     doc.save(path)
-def build_answers_docx(path: Path, final_json: dict, subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"Answer Key — {subject}", level=1)
     answers = {}
     if isinstance(final_json, dict):
-        answers = final_json.get("answers", {}) or final_json.get("final", {}).get("answers", {}) or {}
     if isinstance(answers, dict) and answers:
         for k, v in answers.items():
             p = doc.add_paragraph()
             p.add_run(f"{k}:\n").bold = True
             doc.add_paragraph(str(v))
     else:
-        doc.add_paragraph(json.dumps(final_json.get("answers", final_json), indent=2)[:15000])
     doc.save(path)
-def build_obe_docx(path: Path, final_json: dict, subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"OBE Summary — {subject}", level=1)
     obe = {}
     if isinstance(final_json, dict):
-        obe = final_json.get("obe", {}) or final_json.get("final", {}).get("obe", {}) or {}
-    doc.add_paragraph(json.dumps(obe, indent=2)[:15000])
     doc.save(path)
 # ---------------------------
@@ -354,16 +379,27 @@ def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file)
         syllabus_text = extract_text_from_gradio_file(syllabus_file)
         ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
         if not syllabus_text:
-            # If the user uploaded nothing or extraction failed, show helpful message referencing the sample file
             sample_path = "/mnt/data/cloud_computing_syllabus.txt"
             msg = ("Syllabus extraction failed or file empty. "
-                   f"If you want to test immediately, you can use the sample syllabus located at: {sample_path} "
-                   "Upload a .txt/.pdf/.docx file instead.")
             return None, None, None, msg
         # call orchestrator
         out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)
-        final_json = out.get("final", {})
         gen_raw = out.get("generator_raw", "")
         # write docx files to temp dir
@@ -411,5 +447,4 @@ with gr.Blocks() as app:
 # Launch
 if __name__ == "__main__":
-    # On Spaces, gradio will handle host/port; for local testing you can set share=True
     app.launch()

+# app.py (patched final single-file)
 import os
 import json
 import tempfile
         return resp.json()
 # ---------------------------
+# Safe file text extraction (handles dict and NamedString)
 # ---------------------------
+def extract_text_from_gradio_file(filedata) -> str:
     """
+    Accepts either:
+      - HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n}
+      - Gradio NamedString or plain string (e.g., "/tmp/..")
+    Returns extracted text for .txt, .pdf, .docx, or a text fallback.
     """
     if not filedata:
         return ""
+    # Determine file path
     if isinstance(filedata, dict):
         file_path = filedata.get("path") or filedata.get("name")
     else:
+        # NamedString or plain string
         file_path = str(filedata)
+    if not file_path:
+        return ""
     try:
+        lower = file_path.lower()
         if lower.endswith(".txt"):
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                return f.read()
         if lower.endswith(".pdf"):
+            try:
+                from pypdf import PdfReader
+                reader = PdfReader(file_path)
+                return "\n".join([p.extract_text() or "" for p in reader.pages])
+            except Exception:
+                with open(file_path, "rb") as f:
+                    return f.read().decode("utf-8", errors="ignore")
         if lower.endswith(".docx"):
+            try:
+                import docx
+                doc = docx.Document(file_path)
+                return "\n".join([p.text for p in doc.paragraphs])
+            except Exception:
+                with open(file_path, "rb") as f:
+                    return f.read().decode("utf-8", errors="ignore")
+        # fallback: read bytes
         with open(file_path, "rb") as f:
             return f.read().decode("utf-8", errors="ignore")
     except Exception:
         return ""
 # ---------------------------
+# Prompt Templates (CSE and Non-CSE)
 # ---------------------------
 NONCSE_TEMPLATE = """
 Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
 Task: Generate an internal/continuous-assessment question paper matching GATE style.
 Rules:
+- Part A: {partA} questions, approx 2 marks each.
+- Part B: {partB} questions, choice/either-or pairs.
 - Part C: {partC} questions, case/design (higher marks).
 - Tag each question at end like: (Bloom's Level: <level> | Unit: <n> | GATE Reference: <year>)
 - Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
 - Maintain difficulty index between 1.8 and 2.5.
+- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end containing "questions".
 """
 CSE_TEMPLATE = """
 Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
 Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
 Rules:
+- Part A: {partA} short-answer questions.
+- Part B: {partB} questions (Either/Or pairs).
+- Part C: {partC} questions (case/design).
 - Tag each question like: (Bloom's Level: <level> | Unit: <n> | Company Tag: <Company, Year>)
 - 20% of questions must be industry/case-study oriented.
 - Provide printable QP and VALID JSON <<QP_JSON>> as described above.
 def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
     template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
     prompt = template.format(partA=partA, partB=partB, partC=partC)
+    prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array. JSON must be valid.\n"
     return prompt
 # ---------------------------
         try:
             return json.loads(candidate)
         except Exception:
             try:
                 start = text.rfind("{")
                 return json.loads(text[start:])
             except Exception:
                 return None
     else:
         try:
             start = text.rfind("{")
             return json.loads(text[start:])
             q = f"{subject} recent developments 2024 2025"
             out = self.serp.search(q, num=n)
             snippets = []
             for item in out.get("organic_results", [])[:n]:
                 title = item.get("title", "")
                 snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
             if not snippets and "answer" in out:
                 snippets.append(str(out.get("answer")))
             return "\n\n".join(snippets)
+        except Exception:
             return ""
     def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
             # Try extract JSON
             qp_json = extract_json_from_text(gen_out)
             if qp_json is None:
                 json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
                 gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
                 try:
                     qp_json = json.loads(gen_json_only)
                 except Exception:
                     qp_json = {"raw_text": gen_out}
             result["qp_json"] = qp_json
             # AGENT 2: VERIFIER
             try:
+                verifier_prompt = (
+                    "You are an academic verifier. Verify the QP JSON below for:\n"
+                    "- Bloom's taxonomy correctness\n"
+                    "- Unit coverage and distribution\n"
+                    "- Correct number of questions per part\n"
+                    "- Tag completeness and Company/GATE tags\n"
+                    "- Difficulty index 1.8-2.5\n"
+                    "- Duplications or ambiguous statements\n"
+                    "Return a JSON object: {'corrections': [...], 'issues': [...]}"
+                )
                 verifier_input = json.dumps(qp_json)[:50000]
                 ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
                 try:
                     ver_json = json.loads(ver_out)
                 except Exception:
                     ver_json = {"raw": ver_out}
                 result["verifier"] = ver_json
             except Exception as e:
                 result["verifier"] = {"error": str(e)}
+            # AGENT 3: FORMATTER
             try:
                 fmt_prompt = (
                     "You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
                 result["final"] = final_json
             except Exception as e:
                 result["final"] = {"error": str(e)}
+        except Exception:
             result["errors"].append(traceback.format_exc())
         return result
 # ---------------------------
+# DOCX builder functions (robust)
 # ---------------------------
 def _add_paragraph(doc, text, bold=False):
+    p = doc.add_paragraph()
+    run = p.add_run(text)
     run.bold = bold
+def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"SNS College of Technology — {subject}", level=1)
     doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
     doc.add_paragraph("\nPrintable Question Paper:\n")
     if generator_raw:
         doc.add_paragraph(generator_raw[:20000])
     questions = []
+    try:
+        if isinstance(final_json, dict):
+            fq = final_json.get("final_qp") or final_json.get("final") or final_json
+            if isinstance(fq, dict):
+                questions = fq.get("questions", []) or []
+    except Exception:
+        questions = []
     if questions:
         table = doc.add_table(rows=1, cols=5)
         hdr = table.rows[0].cells
             row[2].text = str(q.get("question_text", "")).strip()
             row[3].text = str(q.get("course_outcome", ""))
             row[4].text = f"{q.get('bloom_level','')} | {q.get('tags','')}"
+    else:
+        doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.")
     doc.save(path)
+def build_answers_docx(path: Path, final_json: Optional[dict], subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"Answer Key — {subject}", level=1)
     answers = {}
     if isinstance(final_json, dict):
+        # try multiple possible locations
+        answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {}
     if isinstance(answers, dict) and answers:
         for k, v in answers.items():
             p = doc.add_paragraph()
             p.add_run(f"{k}:\n").bold = True
             doc.add_paragraph(str(v))
     else:
+        # fallback: safe dump
+        safe_dump = ""
+        try:
+            safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000]
+        except Exception:
+            safe_dump = str(final_json)[:15000]
+        doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):")
+        doc.add_paragraph(safe_dump)
     doc.save(path)
+def build_obe_docx(path: Path, final_json: Optional[dict], subject: str):
     from docx import Document
     doc = Document()
     doc.add_heading(f"OBE Summary — {subject}", level=1)
     obe = {}
     if isinstance(final_json, dict):
+        obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {}
+    try:
+        doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000])
+    except Exception:
+        doc.add_paragraph(str(obe)[:15000])
     doc.save(path)
 # ---------------------------
         syllabus_text = extract_text_from_gradio_file(syllabus_file)
         ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
         if not syllabus_text:
             sample_path = "/mnt/data/cloud_computing_syllabus.txt"
             msg = ("Syllabus extraction failed or file empty. "
+                   f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.")
             return None, None, None, msg
         # call orchestrator
         out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)
+        # Ensure final_json is always a dict (fallback if None or invalid)
+        raw_final = out.get("final")
+        if isinstance(raw_final, dict):
+            final_json = raw_final
+        else:
+            final_json = {
+                "final_qp": {"questions": []},
+                "answers": {},
+                "obe": {},
+                "error": "Formatter returned invalid JSON or None.",
+                "generator_raw_sample": (out.get("generator_raw") or "")[:5000]
+            }
         gen_raw = out.get("generator_raw", "")
         # write docx files to temp dir
 # Launch
 if __name__ == "__main__":
     app.launch()