Seth commited on
Commit
cb28f8c
·
1 Parent(s): 89a3828
Files changed (1) hide show
  1. backend/app/main.py +1 -748
backend/app/main.py CHANGED
@@ -1,4 +1,3 @@
1
- <<<<<<< HEAD
2
  import os
3
  import time
4
  from typing import List, Dict, Optional
@@ -6,6 +5,7 @@ from typing import List, Dict, Optional
6
  from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.staticfiles import StaticFiles
 
9
  from sqlalchemy.orm import Session
10
  from pydantic import BaseModel
11
 
@@ -16,8 +16,6 @@ from .openrouter_client import extract_fields_from_document
16
  from .auth import get_current_user, get_db, verify_token
17
  from .auth_routes import router as auth_router
18
  from .api_key_auth import get_user_from_api_key
19
- from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
20
- from typing import Optional
21
 
22
  # Allowed file types
23
  ALLOWED_CONTENT_TYPES = [
@@ -80,7 +78,6 @@ async def get_current_user_or_api_key_user(
80
  # Otherwise, try JWT authentication
81
  if credentials:
82
  try:
83
- from .auth import verify_token
84
  token = credentials.credentials
85
  payload = verify_token(token)
86
  user_id = int(payload.get("sub"))
@@ -785,747 +782,3 @@ if os.path.isdir(frontend_dir):
785
  return FileResponse(index_path)
786
  from fastapi import HTTPException
787
  raise HTTPException(status_code=404)
788
- =======
789
- import os
790
- import time
791
- from typing import List, Dict, Optional
792
-
793
- from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
794
- from fastapi.middleware.cors import CORSMiddleware
795
- from fastapi.staticfiles import StaticFiles
796
- from sqlalchemy.orm import Session
797
- from pydantic import BaseModel
798
-
799
- from .db import Base, engine, SessionLocal
800
- from .models import ExtractionRecord, User, ShareToken
801
- from .schemas import ExtractionRecordBase, ExtractionStage
802
- from .openrouter_client import extract_fields_from_document
803
- from .auth import get_current_user, get_db
804
- from .auth_routes import router as auth_router
805
-
806
- # Allowed file types
807
- ALLOWED_CONTENT_TYPES = [
808
- "application/pdf",
809
- "image/png",
810
- "image/jpeg",
811
- "image/jpg",
812
- "image/tiff",
813
- "image/tif"
814
- ]
815
-
816
- # Allowed file extensions (for fallback validation)
817
- ALLOWED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]
818
-
819
- # Maximum file size: 4 MB
820
- MAX_FILE_SIZE = 4 * 1024 * 1024 # 4 MB in bytes
821
-
822
- # Ensure data dir exists for SQLite
823
- os.makedirs("data", exist_ok=True)
824
-
825
- # Create tables
826
- Base.metadata.create_all(bind=engine)
827
-
828
- app = FastAPI(title="Document Capture Demo – Backend")
829
-
830
- # Include auth routes
831
- app.include_router(auth_router)
832
-
833
- # CORS (for safety we allow all; you can tighten later)
834
- app.add_middleware(
835
- CORSMiddleware,
836
- allow_origins=["*"],
837
- allow_credentials=True,
838
- allow_methods=["*"],
839
- allow_headers=["*"],
840
- )
841
-
842
-
843
- def get_db():
844
- db = SessionLocal()
845
- try:
846
- yield db
847
- finally:
848
- db.close()
849
-
850
-
851
- @app.get("/ping")
852
- def ping():
853
- """Healthcheck."""
854
- return {"status": "ok", "message": "backend alive"}
855
-
856
-
857
- def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
858
- """
859
- Build synthetic stage timing data for the History UI.
860
- For now we just split total_ms into 4 stages.
861
- """
862
- if total_ms <= 0:
863
- total_ms = 1000
864
-
865
- return {
866
- "uploading": ExtractionStage(
867
- time=int(total_ms * 0.15),
868
- status="completed",
869
- variation="normal",
870
- ),
871
- "aiAnalysis": ExtractionStage(
872
- time=int(total_ms * 0.55),
873
- status="completed" if status == "completed" else "failed",
874
- variation="normal",
875
- ),
876
- "dataExtraction": ExtractionStage(
877
- time=int(total_ms * 0.2),
878
- status="completed" if status == "completed" else "skipped",
879
- variation="fast",
880
- ),
881
- "outputRendering": ExtractionStage(
882
- time=int(total_ms * 0.1),
883
- status="completed" if status == "completed" else "skipped",
884
- variation="normal",
885
- ),
886
- }
887
-
888
-
889
- @app.post("/api/extract")
890
- async def extract_document(
891
- file: UploadFile = File(...),
892
- key_fields: Optional[str] = Form(None),
893
- db: Session = Depends(get_db),
894
- current_user: User = Depends(get_current_user),
895
- ):
896
- """
897
- Main extraction endpoint used by the Dashboard.
898
- 1) Read the uploaded file
899
- 2) Call OpenRouter + Qwen3-VL
900
- 3) Store a record in SQLite
901
- 4) Return extraction result + metadata
902
- """
903
- start = time.time()
904
- content = await file.read()
905
- content_type = file.content_type or "application/octet-stream"
906
- file_size = len(content)
907
- size_mb = file_size / 1024 / 1024
908
- size_str = f"{size_mb:.2f} MB"
909
-
910
- # Convert file content to base64 for storage
911
- import base64
912
- file_base64 = base64.b64encode(content).decode("utf-8")
913
-
914
- # Validate file size
915
- if file_size > MAX_FILE_SIZE:
916
- raise HTTPException(
917
- status_code=400,
918
- detail=f"File size exceeds 4 MB limit. Your file is {size_mb:.2f} MB."
919
- )
920
-
921
- # Validate file type
922
- file_extension = ""
923
- if file.filename:
924
- file_extension = "." + file.filename.split(".")[-1].lower()
925
-
926
- is_valid_type = (
927
- content_type in ALLOWED_CONTENT_TYPES or
928
- file_extension in ALLOWED_EXTENSIONS
929
- )
930
-
931
- if not is_valid_type:
932
- raise HTTPException(
933
- status_code=400,
934
- detail="Only PDF, PNG, JPG, and TIFF files are allowed."
935
- )
936
-
937
- try:
938
- print(f"[INFO] Starting extraction for file: {file.filename}, type: {content_type}, size: {size_str}")
939
- if key_fields:
940
- print(f"[INFO] Key fields requested: {key_fields}")
941
- extracted = await extract_fields_from_document(content, content_type, file.filename, key_fields)
942
- total_ms = int((time.time() - start) * 1000)
943
-
944
- print(f"[INFO] Extraction completed. Response keys: {list(extracted.keys())}")
945
- print(f"[INFO] Fields extracted: {extracted.get('fields', {})}")
946
-
947
- confidence = float(extracted.get("confidence", 90))
948
- fields = extracted.get("fields", {})
949
-
950
- # Get Fields from root level (if user provided key_fields)
951
- root_fields = extracted.get("Fields", {})
952
-
953
- # Get full_text for text output
954
- full_text = extracted.get("full_text", "")
955
- if full_text:
956
- full_text_words = len(str(full_text).split())
957
- print(f"[INFO] Full text extracted: {full_text_words} words")
958
-
959
- # Check if fields contain structured data (from table parsing)
960
- # If fields is a dict with page_X keys, it's already structured
961
- # If fields is empty or simple, add full_text and pages for text display
962
- if not fields or (isinstance(fields, dict) and not any(k.startswith("page_") for k in fields.keys())):
963
- if full_text:
964
- fields["full_text"] = full_text
965
-
966
- # Also check for pages array
967
- pages_data = extracted.get("pages", [])
968
- if pages_data and isinstance(pages_data, list):
969
- print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
970
- fields["pages"] = pages_data
971
-
972
- # Add Fields at root level if it exists
973
- if root_fields:
974
- fields["Fields"] = root_fields
975
-
976
- # Count fields - if structured data exists, count table rows + root Fields
977
- if isinstance(fields, dict):
978
- # Check if it's structured page data
979
- if any(k.startswith("page_") for k in fields.keys()):
980
- # Count table rows from all pages
981
- table_rows_count = 0
982
- for page_key, page_data in fields.items():
983
- if page_key.startswith("page_") and isinstance(page_data, dict):
984
- table_rows = page_data.get("table", [])
985
- if isinstance(table_rows, list):
986
- table_rows_count += len(table_rows)
987
-
988
- # Count Fields from root level
989
- fields_keys = 0
990
- if isinstance(root_fields, dict):
991
- fields_keys = len(root_fields)
992
-
993
- fields_extracted = table_rows_count + fields_keys
994
- print(f"[INFO] Structured data: {table_rows_count} table rows, {fields_keys} extracted fields")
995
- else:
996
- # Regular fields count (excluding full_text, pages, and Fields)
997
- fields_extracted = len([k for k in fields.keys() if k not in ["full_text", "pages", "Fields"]])
998
- # Add Fields count if it exists
999
- if isinstance(root_fields, dict):
1000
- fields_extracted += len(root_fields)
1001
- else:
1002
- fields_extracted = 0
1003
-
1004
- print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
1005
-
1006
- status = "completed"
1007
- error_message = None
1008
- except Exception as e:
1009
- import traceback
1010
- total_ms = int((time.time() - start) * 1000)
1011
- confidence = 0.0
1012
- fields = {}
1013
- fields_extracted = 0
1014
- status = "failed"
1015
- error_message = str(e)
1016
- print(f"[ERROR] Extraction failed: {error_message}")
1017
- print(f"[ERROR] Traceback: {traceback.format_exc()}")
1018
-
1019
- # Save record to DB
1020
- import json
1021
- import base64
1022
- rec = ExtractionRecord(
1023
- user_id=current_user.id,
1024
- file_name=file.filename,
1025
- file_type=content_type,
1026
- file_size=size_str,
1027
- status=status,
1028
- confidence=confidence,
1029
- fields_extracted=fields_extracted,
1030
- total_time_ms=total_ms,
1031
- raw_output=json.dumps(fields), # Use JSON instead of str() to preserve structure
1032
- file_base64=file_base64, # Store base64 encoded file for preview
1033
- error_message=error_message,
1034
- )
1035
- db.add(rec)
1036
- db.commit()
1037
- db.refresh(rec)
1038
-
1039
- stages = make_stages(total_ms, status)
1040
-
1041
- # Response shape that frontend will consume
1042
- return {
1043
- "id": rec.id,
1044
- "fileName": rec.file_name,
1045
- "fileType": rec.file_type,
1046
- "fileSize": rec.file_size,
1047
- "status": status,
1048
- "confidence": confidence,
1049
- "fieldsExtracted": fields_extracted,
1050
- "totalTime": total_ms,
1051
- "fields": fields,
1052
- "stages": {k: v.dict() for k, v in stages.items()},
1053
- "errorMessage": error_message,
1054
- }
1055
-
1056
-
1057
- @app.get("/api/history", response_model=List[ExtractionRecordBase])
1058
- def get_history(
1059
- db: Session = Depends(get_db),
1060
- current_user: User = Depends(get_current_user),
1061
- ):
1062
- """
1063
- Used by the History page.
1064
- Returns last 100 records for the current user, with synthetic stage data.
1065
- """
1066
- recs = (
1067
- db.query(ExtractionRecord)
1068
- .filter(ExtractionRecord.user_id == current_user.id)
1069
- .order_by(ExtractionRecord.created_at.desc())
1070
- .limit(100)
1071
- .all()
1072
- )
1073
-
1074
- # Deduplicate: if multiple extractions share the same shared_from_extraction_id,
1075
- # keep only the most recent one (to prevent duplicates when same extraction is shared multiple times)
1076
- seen_shared_ids = set()
1077
- deduplicated_recs = []
1078
- for rec in recs:
1079
- if rec.shared_from_extraction_id:
1080
- # This is a shared extraction
1081
- if rec.shared_from_extraction_id not in seen_shared_ids:
1082
- seen_shared_ids.add(rec.shared_from_extraction_id)
1083
- deduplicated_recs.append(rec)
1084
- # Skip duplicates
1085
- else:
1086
- # Original extraction (not shared), always include
1087
- deduplicated_recs.append(rec)
1088
-
1089
- recs = deduplicated_recs
1090
-
1091
- output: List[ExtractionRecordBase] = []
1092
- for r in recs:
1093
- stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
1094
- output.append(
1095
- ExtractionRecordBase(
1096
- id=r.id,
1097
- fileName=r.file_name,
1098
- fileType=r.file_type or "",
1099
- fileSize=r.file_size or "",
1100
- extractedAt=r.created_at,
1101
- status=r.status or "completed",
1102
- confidence=r.confidence or 0.0,
1103
- fieldsExtracted=r.fields_extracted or 0,
1104
- totalTime=r.total_time_ms or 0,
1105
- stages=stages,
1106
- errorMessage=r.error_message,
1107
- )
1108
- )
1109
- return output
1110
-
1111
-
1112
- @app.get("/api/extraction/{extraction_id}")
1113
- def get_extraction(
1114
- extraction_id: int,
1115
- db: Session = Depends(get_db),
1116
- current_user: User = Depends(get_current_user),
1117
- ):
1118
- """
1119
- Get a specific extraction by ID with full fields data.
1120
- Used when viewing output from History page.
1121
- """
1122
- import json
1123
-
1124
- rec = (
1125
- db.query(ExtractionRecord)
1126
- .filter(
1127
- ExtractionRecord.id == extraction_id,
1128
- ExtractionRecord.user_id == current_user.id
1129
- )
1130
- .first()
1131
- )
1132
-
1133
- if not rec:
1134
- from fastapi import HTTPException
1135
- raise HTTPException(status_code=404, detail="Extraction not found")
1136
-
1137
- # Parse the raw_output JSON string back to dict
1138
- fields = {}
1139
- if rec.raw_output:
1140
- try:
1141
- # Try parsing as JSON first (new format)
1142
- fields = json.loads(rec.raw_output)
1143
- except (json.JSONDecodeError, TypeError):
1144
- # If that fails, try using ast.literal_eval for old str() format (backward compatibility)
1145
- try:
1146
- import ast
1147
- # Only use literal_eval if it looks like a Python dict string
1148
- if rec.raw_output.strip().startswith('{'):
1149
- fields = ast.literal_eval(rec.raw_output)
1150
- else:
1151
- fields = {}
1152
- except:
1153
- fields = {}
1154
-
1155
- stages = make_stages(rec.total_time_ms or 1000, rec.status or "completed")
1156
-
1157
- return {
1158
- "id": rec.id,
1159
- "fileName": rec.file_name,
1160
- "fileType": rec.file_type or "",
1161
- "fileSize": rec.file_size or "",
1162
- "status": rec.status or "completed",
1163
- "confidence": rec.confidence or 0.0,
1164
- "fieldsExtracted": rec.fields_extracted or 0,
1165
- "totalTime": rec.total_time_ms or 0,
1166
- "fields": fields,
1167
- "fileBase64": rec.file_base64, # Include base64 encoded file for preview
1168
- "stages": {k: v.dict() for k, v in stages.items()},
1169
- "errorMessage": rec.error_message,
1170
- }
1171
-
1172
-
1173
- @app.post("/api/share")
1174
- async def share_extraction(
1175
- extraction_id: int = Body(...),
1176
- recipient_emails: List[str] = Body(...),
1177
- db: Session = Depends(get_db),
1178
- current_user: User = Depends(get_current_user),
1179
- ):
1180
- """
1181
- Share an extraction with one or more users via email.
1182
- Creates share tokens and sends emails to recipients.
1183
- """
1184
- import secrets
1185
- from datetime import datetime, timedelta
1186
- from .brevo_service import send_share_email
1187
- from .email_validator import validate_business_email
1188
-
1189
- # Validate recipient emails list
1190
- if not recipient_emails or len(recipient_emails) == 0:
1191
- raise HTTPException(status_code=400, detail="At least one recipient email is required")
1192
-
1193
- # Validate each recipient email is a business email
1194
- for email in recipient_emails:
1195
- try:
1196
- validate_business_email(email)
1197
- except HTTPException:
1198
- raise # Re-raise HTTPException from validate_business_email
1199
-
1200
- # Get the extraction record
1201
- extraction = (
1202
- db.query(ExtractionRecord)
1203
- .filter(
1204
- ExtractionRecord.id == extraction_id,
1205
- ExtractionRecord.user_id == current_user.id
1206
- )
1207
- .first()
1208
- )
1209
-
1210
- if not extraction:
1211
- raise HTTPException(status_code=404, detail="Extraction not found")
1212
-
1213
- # Generate share link base URL
1214
- base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
1215
-
1216
- # Process each recipient email
1217
- successful_shares = []
1218
- failed_shares = []
1219
- share_records = []
1220
-
1221
- for recipient_email in recipient_emails:
1222
- recipient_email = recipient_email.strip().lower()
1223
-
1224
- # Generate secure share token for this recipient
1225
- share_token = secrets.token_urlsafe(32)
1226
-
1227
- # Create share token record (expires in 30 days)
1228
- expires_at = datetime.utcnow() + timedelta(days=30)
1229
- share_record = ShareToken(
1230
- token=share_token,
1231
- extraction_id=extraction_id,
1232
- sender_user_id=current_user.id,
1233
- recipient_email=recipient_email,
1234
- expires_at=expires_at,
1235
- )
1236
- db.add(share_record)
1237
- share_records.append((share_record, share_token, recipient_email))
1238
-
1239
- # Commit all share tokens
1240
- try:
1241
- db.commit()
1242
- for share_record, share_token, recipient_email in share_records:
1243
- db.refresh(share_record)
1244
- except Exception as e:
1245
- db.rollback()
1246
- raise HTTPException(status_code=500, detail=f"Failed to create share tokens: {str(e)}")
1247
-
1248
- # Send emails to all recipients
1249
- for share_record, share_token, recipient_email in share_records:
1250
- share_link = f"{base_url}/share/{share_token}"
1251
- try:
1252
- # Get sender's name from current_user, fallback to None if not available
1253
- sender_name = current_user.name if current_user.name else None
1254
- await send_share_email(recipient_email, current_user.email, share_link, sender_name)
1255
- successful_shares.append(recipient_email)
1256
- except Exception as e:
1257
- # Log error but continue with other emails
1258
- print(f"[ERROR] Failed to send share email to {recipient_email}: {str(e)}")
1259
- failed_shares.append(recipient_email)
1260
- # Optionally, you could delete the share token if email fails
1261
- # db.delete(share_record)
1262
-
1263
- # Build response message
1264
- if len(failed_shares) == 0:
1265
- message = f"Extraction shared successfully with {len(successful_shares)} recipient(s)"
1266
- elif len(successful_shares) == 0:
1267
- raise HTTPException(status_code=500, detail=f"Failed to send share emails to all recipients")
1268
- else:
1269
- message = f"Extraction shared with {len(successful_shares)} recipient(s). Failed to send to: {', '.join(failed_shares)}"
1270
-
1271
- return {
1272
- "success": True,
1273
- "message": message,
1274
- "successful_count": len(successful_shares),
1275
- "failed_count": len(failed_shares),
1276
- "successful_emails": successful_shares,
1277
- "failed_emails": failed_shares if failed_shares else None
1278
- }
1279
-
1280
-
1281
- class ShareLinkRequest(BaseModel):
1282
- extraction_id: int
1283
-
1284
- @app.post("/api/share/link")
1285
- async def create_share_link(
1286
- request: ShareLinkRequest,
1287
- db: Session = Depends(get_db),
1288
- current_user: User = Depends(get_current_user),
1289
- ):
1290
- """
1291
- Create a shareable link for an extraction without requiring recipient emails.
1292
- Returns a share link that can be copied and shared manually.
1293
- """
1294
- import secrets
1295
- from datetime import datetime, timedelta
1296
-
1297
- # Get the extraction record
1298
- extraction = (
1299
- db.query(ExtractionRecord)
1300
- .filter(
1301
- ExtractionRecord.id == request.extraction_id,
1302
- ExtractionRecord.user_id == current_user.id
1303
- )
1304
- .first()
1305
- )
1306
-
1307
- if not extraction:
1308
- raise HTTPException(status_code=404, detail="Extraction not found")
1309
-
1310
- # Generate secure share token
1311
- share_token = secrets.token_urlsafe(32)
1312
-
1313
- # Create share token record (expires in 30 days, no specific recipient)
1314
- expires_at = datetime.utcnow() + timedelta(days=30)
1315
- share_record = ShareToken(
1316
- token=share_token,
1317
- extraction_id=request.extraction_id,
1318
- sender_user_id=current_user.id,
1319
- recipient_email=None, # None for public share links (copyable links)
1320
- expires_at=expires_at,
1321
- )
1322
- db.add(share_record)
1323
- db.commit()
1324
- db.refresh(share_record)
1325
-
1326
- # Generate share link
1327
- base_url = os.environ.get("VITE_API_BASE_URL", "https://seth0330-ezofisocr.hf.space")
1328
- share_link = f"{base_url}/share/{share_token}"
1329
-
1330
- return {
1331
- "success": True,
1332
- "share_link": share_link,
1333
- "share_token": share_token,
1334
- "expires_at": expires_at.isoformat() if expires_at else None
1335
- }
1336
-
1337
-
1338
- @app.get("/api/share/{token}")
1339
- async def access_shared_extraction(
1340
- token: str,
1341
- db: Session = Depends(get_db),
1342
- current_user: User = Depends(get_current_user),
1343
- ):
1344
- """
1345
- Access a shared extraction and copy it to the current user's account.
1346
- This endpoint is called after the user logs in via the share link.
1347
- """
1348
- from datetime import datetime
1349
- import json
1350
-
1351
- # Find the share token
1352
- share = (
1353
- db.query(ShareToken)
1354
- .filter(ShareToken.token == token)
1355
- .first()
1356
- )
1357
-
1358
- if not share:
1359
- raise HTTPException(status_code=404, detail="Share link not found or expired")
1360
-
1361
- # Check if token is expired
1362
- if share.expires_at and share.expires_at < datetime.utcnow():
1363
- raise HTTPException(status_code=410, detail="Share link has expired")
1364
-
1365
- # Get the original extraction
1366
- original_extraction = (
1367
- db.query(ExtractionRecord)
1368
- .filter(ExtractionRecord.id == share.extraction_id)
1369
- .first()
1370
- )
1371
-
1372
- if not original_extraction:
1373
- raise HTTPException(status_code=404, detail="Original extraction not found")
1374
-
1375
- # Check if already copied for this user (check by share token to prevent duplicates from same share)
1376
- # Also check if this specific share token was already used by this user
1377
- if share.accessed and share.accessed_by_user_id == current_user.id:
1378
- # This share token was already used by this user, find the extraction
1379
- existing_copy = (
1380
- db.query(ExtractionRecord)
1381
- .filter(
1382
- ExtractionRecord.user_id == current_user.id,
1383
- ExtractionRecord.shared_from_extraction_id == original_extraction.id
1384
- )
1385
- .order_by(ExtractionRecord.created_at.desc())
1386
- .first()
1387
- )
1388
-
1389
- if existing_copy:
1390
- return {
1391
- "success": True,
1392
- "extraction_id": existing_copy.id,
1393
- "message": "Extraction already shared with you"
1394
- }
1395
-
1396
- # Also check if any copy exists for this user from this original extraction
1397
- existing_copy = (
1398
- db.query(ExtractionRecord)
1399
- .filter(
1400
- ExtractionRecord.user_id == current_user.id,
1401
- ExtractionRecord.shared_from_extraction_id == original_extraction.id
1402
- )
1403
- .first()
1404
- )
1405
-
1406
- if existing_copy:
1407
- # Already copied, mark this share as accessed and return existing extraction ID
1408
- share.accessed = True
1409
- share.accessed_at = datetime.utcnow()
1410
- share.accessed_by_user_id = current_user.id
1411
- db.commit()
1412
-
1413
- return {
1414
- "success": True,
1415
- "extraction_id": existing_copy.id,
1416
- "message": "Extraction already shared with you"
1417
- }
1418
-
1419
- # Copy extraction to current user's account
1420
- # Parse the raw_output JSON string back to dict
1421
- fields = {}
1422
- if original_extraction.raw_output:
1423
- try:
1424
- fields = json.loads(original_extraction.raw_output)
1425
- except (json.JSONDecodeError, TypeError):
1426
- try:
1427
- import ast
1428
- if original_extraction.raw_output.strip().startswith('{'):
1429
- fields = ast.literal_eval(original_extraction.raw_output)
1430
- else:
1431
- fields = {}
1432
- except:
1433
- fields = {}
1434
-
1435
- # Create new extraction record for the recipient
1436
- new_extraction = ExtractionRecord(
1437
- user_id=current_user.id,
1438
- file_name=original_extraction.file_name,
1439
- file_type=original_extraction.file_type,
1440
- file_size=original_extraction.file_size,
1441
- status=original_extraction.status or "completed",
1442
- confidence=original_extraction.confidence or 0.0,
1443
- fields_extracted=original_extraction.fields_extracted or 0,
1444
- total_time_ms=original_extraction.total_time_ms or 0,
1445
- raw_output=original_extraction.raw_output, # Copy the JSON string
1446
- file_base64=original_extraction.file_base64, # Copy the base64 file
1447
- shared_from_extraction_id=original_extraction.id,
1448
- shared_by_user_id=share.sender_user_id,
1449
- )
1450
- db.add(new_extraction)
1451
-
1452
- # Mark share as accessed
1453
- share.accessed = True
1454
- share.accessed_at = datetime.utcnow()
1455
- share.accessed_by_user_id = current_user.id
1456
-
1457
- db.commit()
1458
- db.refresh(new_extraction)
1459
-
1460
- return {
1461
- "success": True,
1462
- "extraction_id": new_extraction.id,
1463
- "message": "Extraction shared successfully"
1464
- }
1465
-
1466
-
1467
- # Static frontend mounting (used after we build React)
1468
- # Dockerfile copies the Vite build into backend/frontend_dist
1469
- # IMPORTANT: API routes must be defined BEFORE this so they take precedence
1470
- frontend_dir = os.path.join(
1471
- os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
1472
- )
1473
-
1474
- if os.path.isdir(frontend_dir):
1475
- # Serve static files (JS, CSS, images, etc.) from assets directory
1476
- assets_dir = os.path.join(frontend_dir, "assets")
1477
- if os.path.isdir(assets_dir):
1478
- app.mount(
1479
- "/assets",
1480
- StaticFiles(directory=assets_dir),
1481
- name="assets",
1482
- )
1483
-
1484
- # Serve static files from root (logo.png, favicon.ico, etc.)
1485
- # Files in public/ directory are copied to dist/ root during Vite build
1486
- # These routes must be defined BEFORE the catch-all route
1487
- @app.get("/logo.png")
1488
- async def serve_logo():
1489
- """Serve logo.png from frontend_dist root."""
1490
- from fastapi.responses import FileResponse
1491
- logo_path = os.path.join(frontend_dir, "logo.png")
1492
- if os.path.exists(logo_path):
1493
- return FileResponse(logo_path, media_type="image/png")
1494
- from fastapi import HTTPException
1495
- raise HTTPException(status_code=404)
1496
-
1497
- @app.get("/favicon.ico")
1498
- async def serve_favicon():
1499
- """Serve favicon.ico from frontend_dist root."""
1500
- from fastapi.responses import FileResponse
1501
- favicon_path = os.path.join(frontend_dir, "favicon.ico")
1502
- if os.path.exists(favicon_path):
1503
- return FileResponse(favicon_path, media_type="image/x-icon")
1504
- from fastapi import HTTPException
1505
- raise HTTPException(status_code=404)
1506
-
1507
- # Catch-all route to serve index.html for React Router
1508
- # This must be last so API routes and static files are matched first
1509
- @app.get("/{full_path:path}")
1510
- async def serve_frontend(full_path: str):
1511
- """
1512
- Serve React app for all non-API routes.
1513
- React Router will handle client-side routing.
1514
- """
1515
- # Skip API routes, docs, static assets, and known static files
1516
- if (full_path.startswith("api/") or
1517
- full_path.startswith("docs") or
1518
- full_path.startswith("openapi.json") or
1519
- full_path.startswith("assets/") or
1520
- full_path in ["logo.png", "favicon.ico"]):
1521
- from fastapi import HTTPException
1522
- raise HTTPException(status_code=404)
1523
-
1524
- # Serve index.html for all other routes (React Router will handle routing)
1525
- from fastapi.responses import FileResponse
1526
- index_path = os.path.join(frontend_dir, "index.html")
1527
- if os.path.exists(index_path):
1528
- return FileResponse(index_path)
1529
- from fastapi import HTTPException
1530
- raise HTTPException(status_code=404)
1531
- >>>>>>> daae7a900bd14d0802e4f04b99edb85493053f1d
 
 
1
  import os
2
  import time
3
  from typing import List, Dict, Optional
 
5
  from fastapi import FastAPI, UploadFile, File, Depends, Form, HTTPException, Body
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from fastapi.staticfiles import StaticFiles
8
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
9
  from sqlalchemy.orm import Session
10
  from pydantic import BaseModel
11
 
 
16
  from .auth import get_current_user, get_db, verify_token
17
  from .auth_routes import router as auth_router
18
  from .api_key_auth import get_user_from_api_key
 
 
19
 
20
  # Allowed file types
21
  ALLOWED_CONTENT_TYPES = [
 
78
  # Otherwise, try JWT authentication
79
  if credentials:
80
  try:
 
81
  token = credentials.credentials
82
  payload = verify_token(token)
83
  user_id = int(payload.get("sub"))
 
782
  return FileResponse(index_path)
783
  from fastapi import HTTPException
784
  raise HTTPException(status_code=404)