Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import List
|
|
| 4 |
from pathlib import Path
|
| 5 |
import shutil
|
| 6 |
import tempfile
|
|
|
|
| 7 |
|
| 8 |
from langchain_docling import DoclingLoader
|
| 9 |
from langchain_docling.loader import ExportType
|
|
@@ -24,16 +25,24 @@ async def upload_file(file: UploadFile = File(...)):
|
|
| 24 |
# with file_path.open("wb") as buffer:
|
| 25 |
# shutil.copyfileobj(file.file, buffer)
|
| 26 |
|
| 27 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
temp_file_path = temp_file.name
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
loader = DoclingLoader(file_path=str(
|
| 37 |
docs = loader.load()
|
| 38 |
# docs = docs.model_dump()
|
| 39 |
return {
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
import shutil
|
| 6 |
import tempfile
|
| 7 |
+
import os
|
| 8 |
|
| 9 |
from langchain_docling import DoclingLoader
|
| 10 |
from langchain_docling.loader import ExportType
|
|
|
|
| 25 |
# with file_path.open("wb") as buffer:
|
| 26 |
# shutil.copyfileobj(file.file, buffer)
|
| 27 |
|
| 28 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
|
| 29 |
+
# # Efficiently write the uploaded file's content to the temporary file
|
| 30 |
+
# contents = await file.read()
|
| 31 |
+
# temp_file.write(contents)
|
| 32 |
|
| 33 |
+
# temp_file_path = temp_file.name
|
| 34 |
|
| 35 |
+
suffix = os.path.splitext(file.filename)[-1] or ".pdf"
|
| 36 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir="/tmp") as tmp:
|
| 37 |
+
shutil.copyfileobj(file.file, tmp)
|
| 38 |
+
tmp_path = tmp.name
|
| 39 |
+
|
| 40 |
+
# At this point, tmp_path is a real file path in /tmp
|
| 41 |
+
# Debug: check if file is valid
|
| 42 |
+
size = os.path.getsize(tmp_path)
|
| 43 |
+
print(f"Saved {file.filename} -> {tmp_path} ({size} bytes)")
|
| 44 |
|
| 45 |
+
loader = DoclingLoader(file_path=str(tmp_path), export_type=ExportType.MARKDOWN)
|
| 46 |
docs = loader.load()
|
| 47 |
# docs = docs.model_dump()
|
| 48 |
return {
|