Spaces:

codeboosterstech
/

SNS

Sleeping

App Files Files Community

SNS / app.py

codeboosterstech

Update app.py

943a060 verified 2 months ago

raw

history blame contribute delete

19.5 kB

	# app.py (patched final single-file)
	import os
	import json
	import tempfile
	import traceback
	from pathlib import Path
	from typing import Optional, Dict, Any, List

	import requests
	import gradio as gr

	# ---------------------------
	# CONFIG / MODELS (Groq model ids)
	# ---------------------------
	GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "llama-3.1-70b-versatile")
	VERIFIER_MODEL = os.getenv("VERIFIER_MODEL", "gemma2-27b-it")
	FORMATTER_MODEL = os.getenv("FORMATTER_MODEL", "mixtral-8x7b-32768")

	GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
	SERP_URL = "https://serpapi.com/search"

	# ---------------------------
	# Helpers: Groq Client & SerpClient
	# ---------------------------
	class GroqClient:
	def __init__(self, api_key: Optional[str] = None):
	self.api_key = api_key or os.getenv("GROQ_API_KEY")
	if not self.api_key:
	raise RuntimeError("GROQ_API_KEY environment variable or Space secret is required.")
	self.url = GROQ_URL
	self.headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}

	def chat(self, messages: List[Dict[str, str]], model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
	payload = {
	"model": model,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	"top_p": 0.95
	}
	resp = requests.post(self.url, headers=self.headers, json=payload, timeout=120)
	if resp.status_code != 200:
	raise RuntimeError(f"Groq API error {resp.status_code}: {resp.text}")
	data = resp.json()
	try:
	return data["choices"][0]["message"]["content"]
	except Exception:
	return json.dumps(data)

	def generate_text(self, system: str, user: str, model: str, max_tokens: int = 2048, temperature: float = 0.0) -> str:
	messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
	return self.chat(messages=messages, model=model, max_tokens=max_tokens, temperature=temperature)


	class SerpClient:
	def __init__(self, api_key: Optional[str] = None):
	self.api_key = api_key or os.getenv("SERPAPI_KEY")
	if not self.api_key:
	raise RuntimeError("SERPAPI_KEY environment variable or Space secret is required.")
	self.url = SERP_URL

	def search(self, query: str, num: int = 5) -> Dict[str, Any]:
	params = {"q": query, "api_key": self.api_key, "num": num}
	resp = requests.get(self.url, params=params, timeout=30)
	if resp.status_code != 200:
	raise RuntimeError(f"SerpAPI error {resp.status_code}: {resp.text}")
	return resp.json()

	# ---------------------------
	# Safe file text extraction (handles dict and NamedString)
	# ---------------------------
	def extract_text_from_gradio_file(filedata) -> str:
	"""
	Accepts either:
	- HF Spaces FileData dict: {"name": "...", "path": "/tmp/..", "size": n}
	- Gradio NamedString or plain string (e.g., "/tmp/..")
	Returns extracted text for .txt, .pdf, .docx, or a text fallback.
	"""
	if not filedata:
	return ""

	# Determine file path
	if isinstance(filedata, dict):
	file_path = filedata.get("path") or filedata.get("name")
	else:
	# NamedString or plain string
	file_path = str(filedata)

	if not file_path:
	return ""

	try:
	lower = file_path.lower()
	if lower.endswith(".txt"):
	with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
	return f.read()
	if lower.endswith(".pdf"):
	try:
	from pypdf import PdfReader
	reader = PdfReader(file_path)
	return "\n".join([p.extract_text() or "" for p in reader.pages])
	except Exception:
	with open(file_path, "rb") as f:
	return f.read().decode("utf-8", errors="ignore")
	if lower.endswith(".docx"):
	try:
	import docx
	doc = docx.Document(file_path)
	return "\n".join([p.text for p in doc.paragraphs])
	except Exception:
	with open(file_path, "rb") as f:
	return f.read().decode("utf-8", errors="ignore")
	# fallback: read bytes
	with open(file_path, "rb") as f:
	return f.read().decode("utf-8", errors="ignore")
	except Exception:
	return ""

	# ---------------------------
	# Prompt Templates (CSE and Non-CSE)
	# ---------------------------
	NONCSE_TEMPLATE = """
	Role: You are an expert academic content creator for Mechanical/Electrical/Electronics (Non-CSE).
	Task: Generate an internal/continuous-assessment question paper matching GATE style.
	Rules:
	- Part A: {partA} questions, approx 2 marks each.
	- Part B: {partB} questions, choice/either-or pairs.
	- Part C: {partC} questions, case/design (higher marks).
	- Tag each question at end like: (Bloom's Level: <level> \| Unit: <n> \| GATE Reference: <year>)
	- Provide even unit coverage across the syllabus, ensure ~20% real-world/case-based questions.
	- Maintain difficulty index between 1.8 and 2.5.
	- Produce two outputs: Human-readable printable QP, and VALID JSON labeled <<QP_JSON>> at the very end containing "questions".
	"""

	CSE_TEMPLATE = """
	Role: You are an expert academic content creator for Computer Science (CSE), aligned with MAANGO BIG15.
	Task: Generate an internal/continuous-assessment question paper aligned with industry standards.
	Rules:
	- Part A: {partA} short-answer questions.
	- Part B: {partB} questions (Either/Or pairs).
	- Part C: {partC} questions (case/design).
	- Tag each question like: (Bloom's Level: <level> \| Unit: <n> \| Company Tag: <Company, Year>)
	- 20% of questions must be industry/case-study oriented.
	- Provide printable QP and VALID JSON <<QP_JSON>> as described above.
	"""

	def build_master_prompt(stream: str, subject: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str, realtime_snippets: str) -> str:
	template = CSE_TEMPLATE if stream.lower().startswith("cse") else NONCSE_TEMPLATE
	prompt = template.format(partA=partA, partB=partB, partC=partC)
	prompt += f"\nSubject: {subject}\n\nSyllabus (first 15000 chars):\n{(syllabus_text or '')[:15000]}\n\nReference QP (first 8000 chars):\n{(ref_qp_text or '')[:8000]}\n\nRealtime evidence (from web):\n{(realtime_snippets or '')[:5000]}\n\nINSTRUCTIONS:\n1) First provide the printable Question Paper\n2) At the very end provide the JSON labeled <<QP_JSON>> containing 'questions' array. JSON must be valid.\n"
	return prompt

	# ---------------------------
	# Utility: extract JSON suffix from generator text
	# ---------------------------
	def extract_json_from_text(text: str) -> Optional[dict]:
	if not text:
	return None
	# try to locate <<QP_JSON>>
	idx = text.rfind("<<QP_JSON>>")
	candidate = text[idx + len("<<QP_JSON>>"):].strip() if idx != -1 else None
	if candidate:
	try:
	return json.loads(candidate)
	except Exception:
	try:
	start = text.rfind("{")
	return json.loads(text[start:])
	except Exception:
	return None
	else:
	try:
	start = text.rfind("{")
	return json.loads(text[start:])
	except Exception:
	return None

	# ---------------------------
	# Multi-agent orchestrator (inlined)
	# ---------------------------
	class MultiAgentOrchestrator:
	def __init__(self, groq_client: GroqClient, serp_client: SerpClient):
	self.groq = groq_client
	self.serp = serp_client

	def fetch_realtime_snippets(self, subject: str, n: int = 4) -> str:
	try:
	q = f"{subject} recent developments 2024 2025"
	out = self.serp.search(q, num=n)
	snippets = []
	for item in out.get("organic_results", [])[:n]:
	title = item.get("title", "")
	snippet = item.get("snippet", "") or item.get("snippet_highlighted_words", "")
	link = item.get("link", "")
	if title or snippet:
	snippets.append(f"{title}\n{snippet}\n{link}")
	if not snippets and "answer" in out:
	snippets.append(str(out.get("answer")))
	return "\n\n".join(snippets)
	except Exception:
	return ""

	def run_pipeline(self, subject: str, stream: str, partA: int, partB: int, partC: int, syllabus_text: str, ref_qp_text: str) -> Dict[str, Any]:
	result = {"generator_raw": "", "qp_json": None, "verifier": None, "final": None, "errors": []}
	try:
	realtime = self.fetch_realtime_snippets(subject)
	prompt = build_master_prompt(stream, subject, partA, partB, partC, syllabus_text, ref_qp_text, realtime)

	# AGENT 1: GENERATOR
	try:
	gen_out = self.groq.generate_text(system="You are an exam question paper generator.", user=prompt, model=GENERATOR_MODEL, max_tokens=6000, temperature=0.0)
	except Exception as e:
	raise RuntimeError(f"Generator agent failed: {e}")
	result["generator_raw"] = gen_out

	# Try extract JSON
	qp_json = extract_json_from_text(gen_out)
	if qp_json is None:
	json_only_prompt = prompt + "\n\nNow output ONLY the VALID JSON object 'questions' for the paper (no additional text)."
	gen_json_only = self.groq.generate_text(system="Return JSON only.", user=json_only_prompt, model=GENERATOR_MODEL, max_tokens=3000, temperature=0.0)
	try:
	qp_json = json.loads(gen_json_only)
	except Exception:
	qp_json = {"raw_text": gen_out}
	result["qp_json"] = qp_json

	# AGENT 2: VERIFIER
	try:
	verifier_prompt = (
	"You are an academic verifier. Verify the QP JSON below for:\n"
	"- Bloom's taxonomy correctness\n"
	"- Unit coverage and distribution\n"
	"- Correct number of questions per part\n"
	"- Tag completeness and Company/GATE tags\n"
	"- Difficulty index 1.8-2.5\n"
	"- Duplications or ambiguous statements\n"
	"Return a JSON object: {'corrections': [...], 'issues': [...]}"
	)
	verifier_input = json.dumps(qp_json)[:50000]
	ver_out = self.groq.generate_text(system="Verifier agent.", user=verifier_prompt + "\n\n" + verifier_input, model=VERIFIER_MODEL, max_tokens=2000, temperature=0.0)
	try:
	ver_json = json.loads(ver_out)
	except Exception:
	ver_json = {"raw": ver_out}
	result["verifier"] = ver_json
	except Exception as e:
	result["verifier"] = {"error": str(e)}

	# AGENT 3: FORMATTER
	try:
	fmt_prompt = (
	"You are a formatter. Input QP JSON and corrections. Apply corrections, ensure valid JSON structure, "
	"and produce a single JSON object with keys: final_qp, answers, obe.\n\nQP_JSON:\n"
	+ json.dumps(qp_json)[:50000]
	+ "\n\nVERIFIER_CORRECTIONS:\n"
	+ json.dumps(result["verifier"])[:50000]
	+ "\n\nReturn ONE valid JSON object."
	)
	fmt_out = self.groq.generate_text(system="Formatter agent.", user=fmt_prompt, model=FORMATTER_MODEL, max_tokens=4000, temperature=0.0)
	try:
	final_json = json.loads(fmt_out)
	except Exception:
	final_json = {"raw_formatter_output": fmt_out, "qp_json": qp_json, "verifier": result["verifier"]}
	result["final"] = final_json
	except Exception as e:
	result["final"] = {"error": str(e)}
	except Exception:
	result["errors"].append(traceback.format_exc())
	return result

	# ---------------------------
	# DOCX builder functions (robust)
	# ---------------------------
	def _add_paragraph(doc, text, bold=False):
	p = doc.add_paragraph()
	run = p.add_run(text)
	run.bold = bold

	def build_question_paper_docx(path: Path, final_json: Optional[dict], generator_raw: str, subject: str):
	from docx import Document
	doc = Document()
	doc.add_heading(f"SNS College of Technology — {subject}", level=1)
	doc.add_paragraph("Instructions: Answer as per marks. Each question is tagged with Bloom's level and Unit.")
	doc.add_paragraph("\nPrintable Question Paper:\n")
	if generator_raw:
	doc.add_paragraph(generator_raw[:20000])

	questions = []
	try:
	if isinstance(final_json, dict):
	fq = final_json.get("final_qp") or final_json.get("final") or final_json
	if isinstance(fq, dict):
	questions = fq.get("questions", []) or []
	except Exception:
	questions = []

	if questions:
	table = doc.add_table(rows=1, cols=5)
	hdr = table.rows[0].cells
	hdr[0].text = "Q.No"
	hdr[1].text = "SubQ"
	hdr[2].text = "Question"
	hdr[3].text = "Course Outcome"
	hdr[4].text = "Bloom / Tags"
	for q in questions:
	row = table.add_row().cells
	row[0].text = str(q.get("question_no", ""))
	row[1].text = str(q.get("sub_no", ""))
	row[2].text = str(q.get("question_text", "")).strip()
	row[3].text = str(q.get("course_outcome", ""))
	row[4].text = f"{q.get('bloom_level','')} \| {q.get('tags','')}"
	else:
	doc.add_paragraph("No structured questions were produced by the formatter. See the raw generator output above.")

	doc.save(path)

	def build_answers_docx(path: Path, final_json: Optional[dict], subject: str):
	from docx import Document
	doc = Document()
	doc.add_heading(f"Answer Key — {subject}", level=1)

	answers = {}
	if isinstance(final_json, dict):
	# try multiple possible locations
	answers = final_json.get("answers") or final_json.get("final", {}).get("answers", {}) or {}
	if isinstance(answers, dict) and answers:
	for k, v in answers.items():
	p = doc.add_paragraph()
	p.add_run(f"{k}:\n").bold = True
	doc.add_paragraph(str(v))
	else:
	# fallback: safe dump
	safe_dump = ""
	try:
	safe_dump = json.dumps(final_json or {"note": "No final JSON"}, indent=2)[:15000]
	except Exception:
	safe_dump = str(final_json)[:15000]
	doc.add_paragraph("No structured answers provided by AI. Falling back to raw final JSON (truncated):")
	doc.add_paragraph(safe_dump)

	doc.save(path)

	def build_obe_docx(path: Path, final_json: Optional[dict], subject: str):
	from docx import Document
	doc = Document()
	doc.add_heading(f"OBE Summary — {subject}", level=1)

	obe = {}
	if isinstance(final_json, dict):
	obe = final_json.get("obe") or final_json.get("final", {}).get("obe", {}) or {}
	try:
	doc.add_paragraph(json.dumps(obe or {"note": "No OBE produced"}, indent=2)[:15000])
	except Exception:
	doc.add_paragraph(str(obe)[:15000])

	doc.save(path)

	# ---------------------------
	# Initialize clients (raise friendly error if secrets missing)
	# ---------------------------
	try:
	groq_client = GroqClient(api_key=os.getenv("GROQ_API_KEY"))
	serp_client = SerpClient(api_key=os.getenv("SERPAPI_KEY"))
	orchestrator = MultiAgentOrchestrator(groq_client, serp_client)
	except Exception as e:
	orchestrator = None
	init_error = str(e)
	else:
	init_error = None

	# ---------------------------
	# Gradio UI: single-file app
	# ---------------------------
	def run_system_ui(subject, stream, partA, partB, partC, syllabus_file, ref_file):
	if init_error:
	return None, None, None, f"Server init error: {init_error}"
	try:
	# extract text from uploaded syllabus and reference QP
	syllabus_text = extract_text_from_gradio_file(syllabus_file)
	ref_text = extract_text_from_gradio_file(ref_file) if ref_file else ""
	if not syllabus_text:
	sample_path = "/mnt/data/cloud_computing_syllabus.txt"
	msg = ("Syllabus extraction failed or file empty. "
	f"Use the sample syllabus for testing: {sample_path} or upload a .txt/.pdf/.docx.")
	return None, None, None, msg

	# call orchestrator
	out = orchestrator.run_pipeline(subject=subject, stream=stream, partA=int(partA), partB=int(partB), partC=int(partC), syllabus_text=syllabus_text, ref_qp_text=ref_text)

	# Ensure final_json is always a dict (fallback if None or invalid)
	raw_final = out.get("final")
	if isinstance(raw_final, dict):
	final_json = raw_final
	else:
	final_json = {
	"final_qp": {"questions": []},
	"answers": {},
	"obe": {},
	"error": "Formatter returned invalid JSON or None.",
	"generator_raw_sample": (out.get("generator_raw") or "")[:5000]
	}

	gen_raw = out.get("generator_raw", "")

	# write docx files to temp dir
	tmpdir = Path(tempfile.mkdtemp())
	qp_path = tmpdir / f"{subject.replace(' ','_')}_QuestionPaper.docx"
	ans_path = tmpdir / f"{subject.replace(' ','_')}_AnswerKey.docx"
	obe_path = tmpdir / f"{subject.replace(' ','_')}_OBE_Summary.docx"

	build_question_paper_docx(qp_path, final_json, gen_raw, subject)
	build_answers_docx(ans_path, final_json, subject)
	build_obe_docx(obe_path, final_json, subject)

	return str(qp_path), str(ans_path), str(obe_path), "Generation completed successfully."

	except Exception as e:
	tb = traceback.format_exc()
	return None, None, None, f"Generation failed: {e}\n\n{tb}"

	# Build UI
	with gr.Blocks() as app:
	gr.Markdown("## Multi-Agent Question Paper Generator (Groq + SerpAPI) — Single-file app")
	if init_error:
	gr.Markdown(f"Initialization error: {init_error}")

	with gr.Row():
	subject = gr.Textbox(label="Subject Name", value="Cloud Computing")
	stream = gr.Dropdown(label="Stream", choices=["CSE", "Non-CSE"], value="Non-CSE")

	with gr.Row():
	partA = gr.Number(label="Part A (number of short questions)", value=5, precision=0)
	partB = gr.Number(label="Part B (number of long questions / either-or pairs)", value=5, precision=0)
	partC = gr.Number(label="Part C (number of case/design questions)", value=1, precision=0)

	syllabus = gr.File(label="Upload Syllabus (.txt / .pdf / .docx)")
	ref_qp = gr.File(label="Reference QP (optional)")

	generate_btn = gr.Button("Generate Question Paper")

	qp_file = gr.File(label="Question Paper (.docx)")
	ans_file = gr.File(label="Answer Key (.docx)")
	obe_file = gr.File(label="OBE Summary (.docx)")
	status = gr.Markdown("Status: Idle")

	generate_btn.click(fn=run_system_ui, inputs=[subject, stream, partA, partB, partC, syllabus, ref_qp], outputs=[qp_file, ans_file, obe_file, status])

	# Launch
	if __name__ == "__main__":
	app.launch()