Added new files endpoints
Browse files- app/main.py +5 -2
- app/routes/files.py +54 -0
- app/utils.py +40 -1
app/main.py
CHANGED
|
@@ -9,9 +9,9 @@ from .services import model_client
|
|
| 9 |
|
| 10 |
# Import your routers explicitly
|
| 11 |
# Note: Ensure process.py and health.py are accessible.
|
| 12 |
-
# If they are in a 'routes' folder, change to: from .routes import process, health
|
| 13 |
try:
|
| 14 |
-
from . import process, health
|
| 15 |
except ImportError:
|
| 16 |
# Fallback if files are inside a 'routes' package
|
| 17 |
from .routes import process, health
|
|
@@ -80,6 +80,9 @@ def startup_event():
|
|
| 80 |
|
| 81 |
# --- Router Registration ---
|
| 82 |
|
|
|
|
|
|
|
|
|
|
| 83 |
# Mount the Processing Router (e.g., /process/pdf/stream)
|
| 84 |
app.include_router(process.router, prefix="/process", tags=["Process"])
|
| 85 |
|
|
|
|
| 9 |
|
| 10 |
# Import your routers explicitly
|
| 11 |
# Note: Ensure process.py and health.py are accessible.
|
| 12 |
+
# If they are in a 'routes' folder, change to: from .routes import process, health, files
|
| 13 |
try:
|
| 14 |
+
from . import process, health, files
|
| 15 |
except ImportError:
|
| 16 |
# Fallback if files are inside a 'routes' package
|
| 17 |
from .routes import process, health
|
|
|
|
| 80 |
|
| 81 |
# --- Router Registration ---
|
| 82 |
|
| 83 |
+
# Mount the File Management Router (e.g., /files/upload)
|
| 84 |
+
app.include_router(files.router, prefix="/files", tags=["File Management"]) # <--- Register here
|
| 85 |
+
|
| 86 |
# Mount the Processing Router (e.g., /process/pdf/stream)
|
| 87 |
app.include_router(process.router, prefix="/process", tags=["Process"])
|
| 88 |
|
app/routes/files.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Query
|
| 2 |
+
from starlette.concurrency import run_in_threadpool
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
from ..utils import (
|
| 6 |
+
list_all_jobs,
|
| 7 |
+
get_job_by_filename,
|
| 8 |
+
get_signed_url
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
|
| 13 |
+
@router.get("/list")
|
| 14 |
+
async def list_files():
|
| 15 |
+
"""
|
| 16 |
+
Returns a list of all uploaded PDF filenames and their corresponding
|
| 17 |
+
Markdown report paths currently stored in the database.
|
| 18 |
+
"""
|
| 19 |
+
result = await run_in_threadpool(list_all_jobs)
|
| 20 |
+
return {
|
| 21 |
+
"status": "success",
|
| 22 |
+
"count": len(result["pdf_files"]),
|
| 23 |
+
"uploaded_pdfs": result["pdf_files"],
|
| 24 |
+
"generated_reports": result["md_files"]
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
@router.get("/download")
|
| 28 |
+
async def download_file_by_name(filename: str = Query(..., description="The exact name of the uploaded PDF file")):
|
| 29 |
+
"""
|
| 30 |
+
Takes a file name (e.g., 'document.pdf') as input and returns the
|
| 31 |
+
download URL for its generated Markdown report.
|
| 32 |
+
"""
|
| 33 |
+
# 1. Find the job associated with this filename
|
| 34 |
+
job = await run_in_threadpool(get_job_by_filename, filename)
|
| 35 |
+
|
| 36 |
+
if not job:
|
| 37 |
+
raise HTTPException(status_code=404, detail=f"File '{filename}' not found in records.")
|
| 38 |
+
|
| 39 |
+
# 2. Get the path to the report
|
| 40 |
+
report_path = job.get("report")
|
| 41 |
+
if not report_path:
|
| 42 |
+
raise HTTPException(status_code=404, detail="Report path is missing for this file.")
|
| 43 |
+
|
| 44 |
+
# 3. Generate a secure download link
|
| 45 |
+
download_url = await run_in_threadpool(get_signed_url, report_path)
|
| 46 |
+
|
| 47 |
+
if not download_url:
|
| 48 |
+
raise HTTPException(status_code=500, detail="Could not generate download link.")
|
| 49 |
+
|
| 50 |
+
return {
|
| 51 |
+
"filename": filename,
|
| 52 |
+
"job_id": job.get("uuid"),
|
| 53 |
+
"report_download_url": download_url
|
| 54 |
+
}
|
app/utils.py
CHANGED
|
@@ -112,4 +112,43 @@ def start_cleanup_thread(retention_seconds: int = 24 * 3600, interval_seconds: i
|
|
| 112 |
import threading as _th
|
| 113 |
t = _th.Thread(target=cleanup_expired_reports, args=(retention_seconds, interval_seconds), daemon=True)
|
| 114 |
t.start()
|
| 115 |
-
_cleanup_thread_started = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
import threading as _th
|
| 113 |
t = _th.Thread(target=cleanup_expired_reports, args=(retention_seconds, interval_seconds), daemon=True)
|
| 114 |
t.start()
|
| 115 |
+
_cleanup_thread_started = True
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def get_job_by_filename(filename: str) -> Optional[Dict]:
|
| 119 |
+
"""Fetch a job entry by its original filename."""
|
| 120 |
+
try:
|
| 121 |
+
# Query Supabase for the filename
|
| 122 |
+
response = supabase.table("job_metadata")\
|
| 123 |
+
.select("*")\
|
| 124 |
+
.eq("original_filename", filename)\
|
| 125 |
+
.limit(1)\
|
| 126 |
+
.execute()
|
| 127 |
+
|
| 128 |
+
if response.data and len(response.data) > 0:
|
| 129 |
+
row = response.data[0]
|
| 130 |
+
return {
|
| 131 |
+
"uuid": row["job_id"],
|
| 132 |
+
"original_filename": row["original_filename"],
|
| 133 |
+
"report": row["report_path"],
|
| 134 |
+
"created_at": row["created_at"],
|
| 135 |
+
"expires_at": row["expires_at"]
|
| 136 |
+
}
|
| 137 |
+
except Exception as e:
|
| 138 |
+
print(f"Error checking duplicate: {e}")
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
def list_all_jobs(limit: int = 100) -> Dict[str, List[str]]:
|
| 142 |
+
"""Return a separated list of PDF filenames and MD report paths."""
|
| 143 |
+
try:
|
| 144 |
+
data = read_metadata(limit)
|
| 145 |
+
pdf_files = [item["original_filename"] for item in data]
|
| 146 |
+
md_files = [item["report"] for item in data if item.get("report")]
|
| 147 |
+
return {
|
| 148 |
+
"pdf_files": pdf_files,
|
| 149 |
+
"md_files": md_files,
|
| 150 |
+
"full_data": data # Useful if frontend needs ID mapping
|
| 151 |
+
}
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error listing files: {e}")
|
| 154 |
+
return {"pdf_files": [], "md_files": []}
|