Update backend/app/main.py
Browse files- backend/app/main.py +111 -11
backend/app/main.py
CHANGED
|
@@ -451,9 +451,13 @@ async def upload_asset(
|
|
| 451 |
file_suffix = Path(safe_filename).suffix
|
| 452 |
unique_filename = f"{file_stem}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}{file_suffix}"
|
| 453 |
file_path = upload_dir / unique_filename
|
|
|
|
|
|
|
| 454 |
with open(file_path, "wb") as buffer:
|
| 455 |
buffer.write(content)
|
| 456 |
|
|
|
|
|
|
|
| 457 |
# Save to database (keep dummy content as requested)
|
| 458 |
try:
|
| 459 |
from app.models import Asset
|
|
@@ -462,7 +466,7 @@ async def upload_asset(
|
|
| 462 |
|
| 463 |
db_asset = Asset(
|
| 464 |
name=file.filename, # Keep original filename for display
|
| 465 |
-
file_path=str(file_path), # Store
|
| 466 |
file_type=file_type,
|
| 467 |
product_category=product_category or "ocr",
|
| 468 |
sub_category=sub_category if sub_category and sub_category != "none" else None,
|
|
@@ -877,9 +881,41 @@ async def get_pdf_pages(asset_id, db: Session = Depends(get_db)):
|
|
| 877 |
|
| 878 |
if row:
|
| 879 |
print(f"✓ Found asset via direct connection (exception handler): {row[1]} (id={row[0]})")
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 883 |
|
| 884 |
if row[3] != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 885 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
@@ -934,9 +970,41 @@ async def get_pdf_pages(asset_id, db: Session = Depends(get_db)):
|
|
| 934 |
|
| 935 |
# If we got asset from ORM, use it
|
| 936 |
if db_asset:
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
|
| 941 |
if db_asset.file_type != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 942 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
@@ -1017,9 +1085,41 @@ async def get_pdf_pages(asset_id, db: Session = Depends(get_db)):
|
|
| 1017 |
|
| 1018 |
if row:
|
| 1019 |
print(f"✓ Found asset via direct connection: {row[1]} (id={row[0]})")
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1023 |
|
| 1024 |
if row[3] != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 1025 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
@@ -1307,4 +1407,4 @@ if FRONTEND_DIST.exists():
|
|
| 1307 |
# Serve index.html for all other routes (SPA routing)
|
| 1308 |
if INDEX_FILE.exists():
|
| 1309 |
return FileResponse(str(INDEX_FILE))
|
| 1310 |
-
return {"detail": "Frontend not found"}
|
|
|
|
| 451 |
file_suffix = Path(safe_filename).suffix
|
| 452 |
unique_filename = f"{file_stem}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}{file_suffix}"
|
| 453 |
file_path = upload_dir / unique_filename
|
| 454 |
+
# Resolve to absolute path for reliable access
|
| 455 |
+
file_path = file_path.resolve()
|
| 456 |
with open(file_path, "wb") as buffer:
|
| 457 |
buffer.write(content)
|
| 458 |
|
| 459 |
+
print(f"✓ File saved to: {file_path} (absolute path)")
|
| 460 |
+
|
| 461 |
# Save to database (keep dummy content as requested)
|
| 462 |
try:
|
| 463 |
from app.models import Asset
|
|
|
|
| 466 |
|
| 467 |
db_asset = Asset(
|
| 468 |
name=file.filename, # Keep original filename for display
|
| 469 |
+
file_path=str(file_path), # Store absolute path for reliable access
|
| 470 |
file_type=file_type,
|
| 471 |
product_category=product_category or "ocr",
|
| 472 |
sub_category=sub_category if sub_category and sub_category != "none" else None,
|
|
|
|
| 881 |
|
| 882 |
if row:
|
| 883 |
print(f"✓ Found asset via direct connection (exception handler): {row[1]} (id={row[0]})")
|
| 884 |
+
file_path_str = row[2]
|
| 885 |
+
print(f"Debug: file_path from DB: {file_path_str}")
|
| 886 |
+
# Resolve path - handle both relative and absolute paths
|
| 887 |
+
file_path = Path(file_path_str)
|
| 888 |
+
|
| 889 |
+
# Try multiple path resolution strategies
|
| 890 |
+
if file_path.is_absolute():
|
| 891 |
+
# Already absolute, use as-is
|
| 892 |
+
if not file_path.exists():
|
| 893 |
+
raise HTTPException(status_code=404, detail=f"File not found on disk: {file_path}")
|
| 894 |
+
else:
|
| 895 |
+
# Relative path - try multiple locations
|
| 896 |
+
possible_paths = [
|
| 897 |
+
Path.cwd() / file_path_str, # From current working directory
|
| 898 |
+
Path("/app") / file_path_str, # From /app (Docker)
|
| 899 |
+
Path("/app/uploads") / Path(file_path_str).name, # Just filename in /app/uploads
|
| 900 |
+
Path(file_path_str).resolve(), # Resolve relative to current dir
|
| 901 |
+
]
|
| 902 |
+
|
| 903 |
+
file_path = None
|
| 904 |
+
for possible_path in possible_paths:
|
| 905 |
+
if possible_path.exists():
|
| 906 |
+
file_path = possible_path
|
| 907 |
+
print(f"✓ Found file at: {file_path}")
|
| 908 |
+
break
|
| 909 |
+
|
| 910 |
+
if not file_path:
|
| 911 |
+
# List what's actually in /app/uploads for debugging
|
| 912 |
+
uploads_dir = Path("/app/uploads")
|
| 913 |
+
if uploads_dir.exists():
|
| 914 |
+
files_in_uploads = list(uploads_dir.glob("*.pdf"))
|
| 915 |
+
print(f"Debug: PDF files in /app/uploads: {[str(f) for f in files_in_uploads[:5]]}")
|
| 916 |
+
raise HTTPException(status_code=404, detail=f"File not found. Tried: {[str(p) for p in possible_paths]}")
|
| 917 |
+
|
| 918 |
+
print(f"Debug: Using file_path: {file_path} (exists: {file_path.exists()})")
|
| 919 |
|
| 920 |
if row[3] != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 921 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
|
|
| 970 |
|
| 971 |
# If we got asset from ORM, use it
|
| 972 |
if db_asset:
|
| 973 |
+
file_path_str = db_asset.file_path
|
| 974 |
+
print(f"Debug: file_path from ORM: {file_path_str}")
|
| 975 |
+
# Resolve path - handle both relative and absolute paths
|
| 976 |
+
file_path = Path(file_path_str)
|
| 977 |
+
|
| 978 |
+
# Try multiple path resolution strategies
|
| 979 |
+
if file_path.is_absolute():
|
| 980 |
+
# Already absolute, use as-is
|
| 981 |
+
if not file_path.exists():
|
| 982 |
+
raise HTTPException(status_code=404, detail=f"File not found on disk: {file_path}")
|
| 983 |
+
else:
|
| 984 |
+
# Relative path - try multiple locations
|
| 985 |
+
possible_paths = [
|
| 986 |
+
Path.cwd() / file_path_str, # From current working directory
|
| 987 |
+
Path("/app") / file_path_str, # From /app (Docker)
|
| 988 |
+
Path("/app/uploads") / Path(file_path_str).name, # Just filename in /app/uploads
|
| 989 |
+
Path(file_path_str).resolve(), # Resolve relative to current dir
|
| 990 |
+
]
|
| 991 |
+
|
| 992 |
+
file_path = None
|
| 993 |
+
for possible_path in possible_paths:
|
| 994 |
+
if possible_path.exists():
|
| 995 |
+
file_path = possible_path
|
| 996 |
+
print(f"✓ Found file at: {file_path}")
|
| 997 |
+
break
|
| 998 |
+
|
| 999 |
+
if not file_path:
|
| 1000 |
+
# List what's actually in /app/uploads for debugging
|
| 1001 |
+
uploads_dir = Path("/app/uploads")
|
| 1002 |
+
if uploads_dir.exists():
|
| 1003 |
+
files_in_uploads = list(uploads_dir.glob("*.pdf"))
|
| 1004 |
+
print(f"Debug: PDF files in /app/uploads: {[str(f) for f in files_in_uploads[:5]]}")
|
| 1005 |
+
raise HTTPException(status_code=404, detail=f"File not found. Tried: {[str(p) for p in possible_paths]}")
|
| 1006 |
+
|
| 1007 |
+
print(f"Debug: Using file_path: {file_path} (exists: {file_path.exists()})")
|
| 1008 |
|
| 1009 |
if db_asset.file_type != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 1010 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
|
|
| 1085 |
|
| 1086 |
if row:
|
| 1087 |
print(f"✓ Found asset via direct connection: {row[1]} (id={row[0]})")
|
| 1088 |
+
file_path_str = row[2]
|
| 1089 |
+
print(f"Debug: file_path from DB (fallback): {file_path_str}")
|
| 1090 |
+
# Resolve path - handle both relative and absolute paths
|
| 1091 |
+
file_path = Path(file_path_str)
|
| 1092 |
+
|
| 1093 |
+
# Try multiple path resolution strategies
|
| 1094 |
+
if file_path.is_absolute():
|
| 1095 |
+
# Already absolute, use as-is
|
| 1096 |
+
if not file_path.exists():
|
| 1097 |
+
raise HTTPException(status_code=404, detail=f"File not found on disk: {file_path}")
|
| 1098 |
+
else:
|
| 1099 |
+
# Relative path - try multiple locations
|
| 1100 |
+
possible_paths = [
|
| 1101 |
+
Path.cwd() / file_path_str, # From current working directory
|
| 1102 |
+
Path("/app") / file_path_str, # From /app (Docker)
|
| 1103 |
+
Path("/app/uploads") / Path(file_path_str).name, # Just filename in /app/uploads
|
| 1104 |
+
Path(file_path_str).resolve(), # Resolve relative to current dir
|
| 1105 |
+
]
|
| 1106 |
+
|
| 1107 |
+
file_path = None
|
| 1108 |
+
for possible_path in possible_paths:
|
| 1109 |
+
if possible_path.exists():
|
| 1110 |
+
file_path = possible_path
|
| 1111 |
+
print(f"✓ Found file at: {file_path}")
|
| 1112 |
+
break
|
| 1113 |
+
|
| 1114 |
+
if not file_path:
|
| 1115 |
+
# List what's actually in /app/uploads for debugging
|
| 1116 |
+
uploads_dir = Path("/app/uploads")
|
| 1117 |
+
if uploads_dir.exists():
|
| 1118 |
+
files_in_uploads = list(uploads_dir.glob("*.pdf"))
|
| 1119 |
+
print(f"Debug: PDF files in /app/uploads: {[str(f) for f in files_in_uploads[:5]]}")
|
| 1120 |
+
raise HTTPException(status_code=404, detail=f"File not found. Tried: {[str(p) for p in possible_paths]}")
|
| 1121 |
+
|
| 1122 |
+
print(f"Debug: Using file_path (fallback): {file_path} (exists: {file_path.exists()})")
|
| 1123 |
|
| 1124 |
if row[3] != "document" or not str(file_path).lower().endswith('.pdf'):
|
| 1125 |
raise HTTPException(status_code=400, detail="File is not a PDF")
|
|
|
|
| 1407 |
# Serve index.html for all other routes (SPA routing)
|
| 1408 |
if INDEX_FILE.exists():
|
| 1409 |
return FileResponse(str(INDEX_FILE))
|
| 1410 |
+
return {"detail": "Frontend not found"}
|