Hammad712 commited on
Commit
906c82a
·
1 Parent(s): f83819a

Added new files endpoints

Browse files
Files changed (3) hide show
  1. app/main.py +5 -2
  2. app/routes/files.py +54 -0
  3. app/utils.py +40 -1
app/main.py CHANGED
@@ -9,9 +9,9 @@ from .services import model_client
9
 
10
  # Import your routers explicitly
11
  # Note: Ensure process.py and health.py are accessible.
12
- # If they are in a 'routes' folder, change to: from .routes import process, health
13
  try:
14
- from . import process, health
15
  except ImportError:
16
  # Fallback if files are inside a 'routes' package
17
  from .routes import process, health
@@ -80,6 +80,9 @@ def startup_event():
80
 
81
  # --- Router Registration ---
82
 
 
 
 
83
  # Mount the Processing Router (e.g., /process/pdf/stream)
84
  app.include_router(process.router, prefix="/process", tags=["Process"])
85
 
 
9
 
10
  # Import your routers explicitly
11
  # Note: Ensure process.py and health.py are accessible.
12
+ # If they are in a 'routes' folder, change to: from .routes import process, health, files
13
  try:
14
+ from . import process, health, files
15
  except ImportError:
16
  # Fallback if files are inside a 'routes' package
17
  from .routes import process, health
 
80
 
81
  # --- Router Registration ---
82
 
83
+ # Mount the File Management Router (e.g., /files/upload)
84
+ app.include_router(files.router, prefix="/files", tags=["File Management"]) # <--- Register here
85
+
86
  # Mount the Processing Router (e.g., /process/pdf/stream)
87
  app.include_router(process.router, prefix="/process", tags=["Process"])
88
 
app/routes/files.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from starlette.concurrency import run_in_threadpool
3
+ from typing import Optional
4
+
5
+ from ..utils import (
6
+ list_all_jobs,
7
+ get_job_by_filename,
8
+ get_signed_url
9
+ )
10
+
11
+ router = APIRouter()
12
+
13
+ @router.get("/list")
14
+ async def list_files():
15
+ """
16
+ Returns a list of all uploaded PDF filenames and their corresponding
17
+ Markdown report paths currently stored in the database.
18
+ """
19
+ result = await run_in_threadpool(list_all_jobs)
20
+ return {
21
+ "status": "success",
22
+ "count": len(result["pdf_files"]),
23
+ "uploaded_pdfs": result["pdf_files"],
24
+ "generated_reports": result["md_files"]
25
+ }
26
+
27
+ @router.get("/download")
28
+ async def download_file_by_name(filename: str = Query(..., description="The exact name of the uploaded PDF file")):
29
+ """
30
+ Takes a file name (e.g., 'document.pdf') as input and returns the
31
+ download URL for its generated Markdown report.
32
+ """
33
+ # 1. Find the job associated with this filename
34
+ job = await run_in_threadpool(get_job_by_filename, filename)
35
+
36
+ if not job:
37
+ raise HTTPException(status_code=404, detail=f"File '{filename}' not found in records.")
38
+
39
+ # 2. Get the path to the report
40
+ report_path = job.get("report")
41
+ if not report_path:
42
+ raise HTTPException(status_code=404, detail="Report path is missing for this file.")
43
+
44
+ # 3. Generate a secure download link
45
+ download_url = await run_in_threadpool(get_signed_url, report_path)
46
+
47
+ if not download_url:
48
+ raise HTTPException(status_code=500, detail="Could not generate download link.")
49
+
50
+ return {
51
+ "filename": filename,
52
+ "job_id": job.get("uuid"),
53
+ "report_download_url": download_url
54
+ }
app/utils.py CHANGED
@@ -112,4 +112,43 @@ def start_cleanup_thread(retention_seconds: int = 24 * 3600, interval_seconds: i
112
  import threading as _th
113
  t = _th.Thread(target=cleanup_expired_reports, args=(retention_seconds, interval_seconds), daemon=True)
114
  t.start()
115
- _cleanup_thread_started = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  import threading as _th
113
  t = _th.Thread(target=cleanup_expired_reports, args=(retention_seconds, interval_seconds), daemon=True)
114
  t.start()
115
+ _cleanup_thread_started = True
116
+
117
+
118
+ def get_job_by_filename(filename: str) -> Optional[Dict]:
119
+ """Fetch a job entry by its original filename."""
120
+ try:
121
+ # Query Supabase for the filename
122
+ response = supabase.table("job_metadata")\
123
+ .select("*")\
124
+ .eq("original_filename", filename)\
125
+ .limit(1)\
126
+ .execute()
127
+
128
+ if response.data and len(response.data) > 0:
129
+ row = response.data[0]
130
+ return {
131
+ "uuid": row["job_id"],
132
+ "original_filename": row["original_filename"],
133
+ "report": row["report_path"],
134
+ "created_at": row["created_at"],
135
+ "expires_at": row["expires_at"]
136
+ }
137
+ except Exception as e:
138
+ print(f"Error checking duplicate: {e}")
139
+ return None
140
+
141
+ def list_all_jobs(limit: int = 100) -> Dict[str, List[str]]:
142
+ """Return a separated list of PDF filenames and MD report paths."""
143
+ try:
144
+ data = read_metadata(limit)
145
+ pdf_files = [item["original_filename"] for item in data]
146
+ md_files = [item["report"] for item in data if item.get("report")]
147
+ return {
148
+ "pdf_files": pdf_files,
149
+ "md_files": md_files,
150
+ "full_data": data # Useful if frontend needs ID mapping
151
+ }
152
+ except Exception as e:
153
+ print(f"Error listing files: {e}")
154
+ return {"pdf_files": [], "md_files": []}