Spaces:
Sleeping
Sleeping
File size: 4,903 Bytes
cba2c8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from typing import List
import uuid
import threading
import time
import os
from dotenv import load_dotenv
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise RuntimeError("GROQ_API_KEY not set")
# ---------------- YOUR MODULES ----------------
import engine as vl_engine
import Image_topdf as img2pdf_engine
import frequency as freq_engine
from json_to_pdf import json_to_pdf_with_images
from VL_output_to_json import extract_exam_questions
# ---------------- APP ----------------
app = FastAPI(title="Exam Pipeline Backend", version="0.1.0")
# ---------------- CORS ----------------
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ---------------- PATHS ----------------
BASE_DIR = Path(__file__).resolve().parent
QUERY_DIR = BASE_DIR / "queries"
QUERY_DIR.mkdir(parents=True, exist_ok=True)
# ---------------- JOB STATUS STORE ----------------
job_status = {} # job_id β status
job_result = {} # job_id β result
# ======================================================
# SAVE PDF
# ======================================================
@app.post("/save-pdf")
async def save_pdf(file: UploadFile = File(...)):
pdf_name = f"{uuid.uuid4().hex}_{file.filename}"
pdf_path = QUERY_DIR / pdf_name
with open(pdf_path, "wb") as f:
f.write(await file.read())
return {"path": str(pdf_path)}
# ======================================================
# IMAGES β PDF
# ======================================================
@app.post("/images-to-pdf")
async def images_to_pdf(files: List[UploadFile] = File(...)):
pdf_path = QUERY_DIR / f"doc_{uuid.uuid4().hex}.pdf"
img2pdf_engine.images_to_pdf(files=files, output_pdf_path=pdf_path)
return {"path": str(pdf_path)}
# ======================================================
# BACKGROUND PIPELINE (DO NOT CHANGE LOGIC)
# ======================================================
def pipeline_worker(job_id: str, api_key: str, docs: List[str]):
try:
job_status[job_id] = "π Running VL model..."
docs = [Path(p) for p in docs]
vl_results = []
for i, pdf in enumerate(docs, start=1):
md = vl_engine.VL_model(str(pdf), query_id=i)
vl_results.append(md)
job_status[job_id] = "π§ Extracting JSON using LLM..."
json_files = []
for md in vl_results:
out_json = md.replace(".md", ".json")
extract_exam_questions(md, api_key, out_json)
json_files.append(out_json)
job_status[job_id] = "π Running semantic frequency..."
freq_out = freq_engine.run_semantic_frequency_multiple(
json_files, "output_frequency.json"
)
job_status[job_id] = "π Generating final PDF..."
final_pdf = json_to_pdf_with_images(
freq_out, image_base_dir="vl_output_bro"
)
job_result[job_id] = {
"final_pdf": final_pdf,
"frequency_json": "output_frequency.json"
}
job_status[job_id] = "β
Completed"
except Exception as e:
job_status[job_id] = f"β Error: {e}"
# ======================================================
# START PIPELINE
# ======================================================
@app.post("/run-pipeline")
async def run_pipeline(docs: List[str]):
api_key = GROQ_API_KEY
job_id = uuid.uuid4().hex
job_status[job_id] = "π Job started"
# thread = threading.Thread(
# target=pipeline_worker,
# args=(job_id, api_key, docs),
# daemon=True
# )
thread = threading.Thread(
target=pipeline_worker,
args=(job_id,api_key, docs),
daemon=True
)
thread.start()
return {"job_id": job_id}
# ======================================================
# GET JOB STATUS
# ======================================================
@app.get("/job-status/{job_id}")
async def get_job_status(job_id: str):
return {
"status": job_status.get(job_id, "Unknown job"),
"result": job_result.get(job_id)
}
import os
from fastapi.responses import FileResponse
# ======================================================
# DOWNLOAD FILE (FOR VUE FRONTEND)
# ======================================================
@app.get("/download")
async def download_file(path: str):
if not os.path.exists(path):
return {"error": "File not found"}
return FileResponse(
path,
filename=os.path.basename(path),
media_type="application/octet-stream"
)
|