# backend.py import uvicorn from fastapi import FastAPI, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, HTMLResponse from fastapi.staticfiles import StaticFiles import tempfile, io, os, re, json, base64, hashlib from typing import List, Tuple, Dict import fitz # PyMuPDF import requests import pandas as pd from docx import Document from io import BytesIO from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, Boolean from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker import datetime from urllib.parse import quote_plus MYSQL_USER = "root" MYSQL_PASSWORD = "root@MySQL4admin" MYSQL_HOST = "localhost" MYSQL_PORT = 3306 MYSQL_DB = "mcq_db" # URL encode the password encoded_password = quote_plus(MYSQL_PASSWORD) from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, declarative_base import os # Use SQLite instead of MySQL DATABASE_URL = "sqlite:///./app.db" engine = create_engine( DATABASE_URL, connect_args={"check_same_thread": False} # Needed for SQLite ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) from sqlalchemy.orm import declarative_base Base = declarative_base() class Question(Base): __tablename__ = "questions" id = Column(Integer, primary_key=True, index=True) topic = Column(String(255)) type = Column(String(20)) # MCQ / Descriptive question = Column(Text, nullable=False) option_a = Column(Text) option_b = Column(Text) option_c = Column(Text) option_d = Column(Text) answer = Column(Text) descriptive_answer = Column(Text) difficulty = Column(String(10)) created_at = Column(DateTime, default=datetime.datetime.utcnow) flagged = Column(Boolean, default=None) # Change from True to None # Create table if not exists Base.metadata.create_all(bind=engine) import json def save_questions_to_db(results: dict): """ Save parsed results into the questions table. Expected `results` structure: { "Topic Name": { "mcqs": [ { "question": "...", "options": [...], "answer": "A", "difficulty": 2 }, ... ], "descriptive": [ { "question": "...", "answer": "...", "difficulty": 3 }, ... ] }, ... } The function is defensive: it skips entries missing the required 'question' text and logs skipped items. """ db = SessionLocal() saved = 0 skipped = 0 try: # optional: quick debug dump if things keep failing # print("DEBUG save_questions_to_db incoming:", json.dumps(results)[:2000]) for topic, data in (results or {}).items(): # normalize topic value (some callers send topic None) topic_val = topic if topic is not None else None # Save MCQs for mcq in data.get("mcqs", []) if data else []: # robust extraction of fields question_text = mcq.get("question") or mcq.get("q") or None if not question_text or not str(question_text).strip(): print("⚠️ Skipping MCQ with no question text:", mcq) skipped += 1 continue opts = mcq.get("options", []) or [] option_a = opts[0] if len(opts) > 0 else mcq.get("option_a") or None option_b = opts[1] if len(opts) > 1 else mcq.get("option_b") or None option_c = opts[2] if len(opts) > 2 else mcq.get("option_c") or None option_d = opts[3] if len(opts) > 3 else mcq.get("option_d") or None answer = mcq.get("answer") or mcq.get("ans") or None difficulty = mcq.get("difficulty") difficulty = str(difficulty) if difficulty is not None else None q = Question( topic=topic_val, type="MCQ", question=str(question_text).strip(), option_a=option_a, option_b=option_b, option_c=option_c, option_d=option_d, answer=answer, descriptive_answer=None, difficulty=difficulty, created_at=datetime.datetime.utcnow(), flagged=None # pending by default ) db.add(q) saved += 1 # Save Descriptive for dq in data.get("descriptive", []) if data else []: question_text = dq.get("question") or dq.get("q") or None if not question_text or not str(question_text).strip(): print("⚠️ Skipping Descriptive with no question text:", dq) skipped += 1 continue descriptive_answer = dq.get("answer") or dq.get("descriptive_answer") or None difficulty = dq.get("difficulty") difficulty = str(difficulty) if difficulty is not None else None q = Question( topic=topic_val, type="Descriptive", question=str(question_text).strip(), option_a=None, option_b=None, option_c=None, option_d=None, answer=None, descriptive_answer=descriptive_answer, difficulty=difficulty, created_at=datetime.datetime.utcnow(), flagged=None ) db.add(q) saved += 1 db.commit() return {"status": "success", "saved": saved, "skipped": skipped} except Exception as e: db.rollback() print("❌ DB error in save_questions_to_db:", e) # optional: raise or return an error dict return {"status": "error", "error": str(e)} finally: db.close() # ---------- CONFIG ---------- from dotenv import load_dotenv load_dotenv() # OpenRouter Configuration OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") # Set your API key in environment variable OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" OPENROUTER_MODEL = "meta-llama/llama-3.3-70b-instruct:free" # Free model, you can change this # Headers for OpenRouter API OPENROUTER_HEADERS = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "http://localhost:8000", # Optional: your site URL "X-Title": "MCQ Generator" # Optional: your app name } MODEL = OPENROUTER_MODEL HOST = "127.0.0.1" PORT = 8000 # ---------- FASTAPI ---------- app = FastAPI() # HTML_PATH = "design.html" # @app.get("/") # async def read_root(): # return FileResponse(HTML_PATH) app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], allow_credentials=True) # Serve static files (put design.html and any assets inside ./static/) static_dir = os.path.join(os.path.dirname(__file__), "static") if not os.path.isdir(static_dir): os.makedirs(static_dir, exist_ok=True) app.mount("/static", StaticFiles(directory=static_dir), name="static") # Serve design.html at root @app.get("/", response_class=HTMLResponse) async def index(): fpath = os.path.join(static_dir, "design.html") if os.path.exists(fpath): return HTMLResponse(open(fpath, "r", encoding="utf-8").read()) return HTMLResponse("

Place design.html inside ./static/ and reload.

") # ---------- IN-MEMORY STATE & STORE ---------- IN_MEMORY_STORE = {} # key -> {"data": bytes, "name": str, "mime": str} STATE = { "pdf_uploads": 0, "last_pdf_hash": None, "last_pdf_pages": 0, "mcq_count": 0, "desc_count": 0 } def store_result_bytes(key: str, data: bytes, filename: str, mime: str): IN_MEMORY_STORE[key] = {"data": data, "name": filename, "mime": mime} @app.get("/download/{key}") async def download_key(key: str): item = IN_MEMORY_STORE.get(key) if not item: return JSONResponse({"error": "Not found"}, status_code=404) return StreamingResponse(io.BytesIO(item["data"]), media_type=item["mime"], headers={"Content-Disposition": f"attachment; filename={item['name']}"}) @app.get("/status") async def status(): """Return counters for the top dashboard (PDF uploads, pages, counts).""" return { "pdf_uploads": STATE["pdf_uploads"], "last_pdf_pages": STATE["last_pdf_pages"], "mcq_count": STATE["mcq_count"], "desc_count": STATE["desc_count"] } # ---------- UTIL HELPERS (ported from your Streamlit code) ---------- def clean_text(text: str) -> str: if text is None: return "" return re.sub(r"[\x00-\x1F\x7F]", "", str(text)) def detect_index_range(doc, min_section_hits: int = 3, consecutive_break: int = 2) -> Tuple[int, int]: scores = [] has_contents_flags = [] for pno in range(doc.page_count): try: text = doc.load_page(pno).get_text("text") or "" except Exception: text = "" low = text.lower() has_contents = bool(re.search(r"\btable of contents\b|\bcontents\b", low)) count_sections = len(re.findall(r"\b\d{1,2}\.\d+\b", text)) count_leaders = len(re.findall(r"\.{2,}\s*\d+|\s+\d{1,3}\s*$", text, re.M)) score = count_sections + 0.6 * count_leaders + (5 if has_contents else 0) scores.append(score) has_contents_flags.append(has_contents) if any(has_contents_flags): start_idx = next(i for i, f in enumerate(has_contents_flags) if f) end_idx = start_idx break_count = 0 for i in range(start_idx + 1, len(scores)): if scores[i] >= 1.0: end_idx = i break_count = 0 else: break_count += 1 if break_count >= consecutive_break: break return (start_idx + 1, end_idx + 1) start_idx = None for i, s in enumerate(scores): if s >= min_section_hits: start_idx = i break if start_idx is None: raise ValueError("Could not auto-detect contents/index pages.") end_idx = start_idx gap = 0 for i in range(start_idx + 1, len(scores)): if scores[i] >= 1.0: end_idx = i gap = 0 else: gap += 1 if gap >= consecutive_break: break return (start_idx + 1, end_idx + 1) # ---------- OLLAMA CALLS & PARSERS ---------- import time, os, requests, json def call_ollama(prompt: str) -> str: try: payload = { "model": OPENROUTER_MODEL, # e.g. "meta-llama/llama-3.3-70b-instruct:free" "messages": [ {"role": "user", "content": prompt} ] } resp = requests.post( OPENROUTER_API_URL, headers=OPENROUTER_HEADERS, json=payload, timeout=120 ) resp.raise_for_status() data = resp.json() # OpenRouter chat completion shape return data["choices"][0]["message"]["content"].strip() except Exception as e: return f"LOCAL_MODEL_ERROR: {str(e)}" def summarize_text(text: str, model: str = MODEL, max_words: int = 200) -> str: """ Basic fallback summarizer using the same LLM call function. Used only when local BART summarizer fails or is unavailable. """ if not text or not text.strip(): return "" prompt = f""" Summarize the following text clearly and concisely in no more than {max_words} words. Do not omit key information. TEXT: {text} """ try: summary = call_ollama(prompt) return summary.strip() if summary else "" except Exception: # worst-case fallback: truncate return " ".join(text.split()[:max_words]) def generate_mcqs_ollama(topic: str, num_qs: int = 5, context: str = ""): # Use textbook extract as the ONLY source ctx = (context or "").strip() if ctx: # keep context size under control ctx = ctx[:4000] prompt = f""" You are an exam question setter. Use ONLY the following textbook extract as your source. Do NOT use any outside knowledge. Every question and option MUST be directly answerable from this text. TEXTBOOK EXTRACT: \"\"\"{ctx}\"\"\" Topic: "{topic}" Generate {num_qs} high-quality multiple-choice questions that are strictly based on the above extract. STRICT FORMAT (do not add anything before or after this): Q1. A)