File size: 4,903 Bytes
cba2c8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from typing import List
import uuid
import threading
import time
import os
from dotenv import load_dotenv

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise RuntimeError("GROQ_API_KEY not set")


# ---------------- YOUR MODULES ----------------
import engine as vl_engine
import Image_topdf as img2pdf_engine
import frequency as freq_engine
from json_to_pdf import json_to_pdf_with_images
from VL_output_to_json import extract_exam_questions

# ---------------- APP ----------------
app = FastAPI(title="Exam Pipeline Backend", version="0.1.0")

# ---------------- CORS ----------------
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ---------------- PATHS ----------------
BASE_DIR = Path(__file__).resolve().parent
QUERY_DIR = BASE_DIR / "queries"
QUERY_DIR.mkdir(parents=True, exist_ok=True)

# ---------------- JOB STATUS STORE ----------------
job_status = {}     # job_id β†’ status
job_result = {}     # job_id β†’ result

# ======================================================
# SAVE PDF
# ======================================================
@app.post("/save-pdf")
async def save_pdf(file: UploadFile = File(...)):
    pdf_name = f"{uuid.uuid4().hex}_{file.filename}"
    pdf_path = QUERY_DIR / pdf_name
    with open(pdf_path, "wb") as f:
        f.write(await file.read())
    return {"path": str(pdf_path)}

# ======================================================
# IMAGES β†’ PDF
# ======================================================
@app.post("/images-to-pdf")
async def images_to_pdf(files: List[UploadFile] = File(...)):
    pdf_path = QUERY_DIR / f"doc_{uuid.uuid4().hex}.pdf"
    img2pdf_engine.images_to_pdf(files=files, output_pdf_path=pdf_path)
    return {"path": str(pdf_path)}

# ======================================================
# BACKGROUND PIPELINE (DO NOT CHANGE LOGIC)
# ======================================================
def pipeline_worker(job_id: str, api_key: str, docs: List[str]):
    try:
        job_status[job_id] = "πŸ” Running VL model..."

        docs = [Path(p) for p in docs]
        vl_results = []
        for i, pdf in enumerate(docs, start=1):
            md = vl_engine.VL_model(str(pdf), query_id=i)
            vl_results.append(md)

        job_status[job_id] = "🧠 Extracting JSON using LLM..."

        json_files = []
        for md in vl_results:
            out_json = md.replace(".md", ".json")
            extract_exam_questions(md, api_key, out_json)
            json_files.append(out_json)

        job_status[job_id] = "πŸ“Š Running semantic frequency..."

        freq_out = freq_engine.run_semantic_frequency_multiple(
            json_files, "output_frequency.json"
        )

        job_status[job_id] = "πŸ“„ Generating final PDF..."

        final_pdf = json_to_pdf_with_images(
            freq_out, image_base_dir="vl_output_bro"
        )

        job_result[job_id] = {
            "final_pdf": final_pdf,
            "frequency_json": "output_frequency.json"
        }

        job_status[job_id] = "βœ… Completed"

    except Exception as e:
        job_status[job_id] = f"❌ Error: {e}"

# ======================================================
# START PIPELINE
# ======================================================
@app.post("/run-pipeline")
async def run_pipeline(docs: List[str]):
    api_key = GROQ_API_KEY 
    job_id = uuid.uuid4().hex
    job_status[job_id] = "πŸš€ Job started"

    # thread = threading.Thread(
    #     target=pipeline_worker,
    #     args=(job_id, api_key, docs),
    #     daemon=True
    # )
    thread = threading.Thread(
        target=pipeline_worker,
        args=(job_id,api_key, docs),
        daemon=True
    )
    thread.start()

    return {"job_id": job_id}

# ======================================================
# GET JOB STATUS
# ======================================================
@app.get("/job-status/{job_id}")
async def get_job_status(job_id: str):
    return {
        "status": job_status.get(job_id, "Unknown job"),
        "result": job_result.get(job_id)
    }
import os
from fastapi.responses import FileResponse
# ======================================================
# DOWNLOAD FILE (FOR VUE FRONTEND)
# ======================================================
@app.get("/download")
async def download_file(path: str):
    if not os.path.exists(path):
        return {"error": "File not found"}

    return FileResponse(
        path,
        filename=os.path.basename(path),
        media_type="application/octet-stream"
    )