jonathanjordan21 commited on
Commit
abcbaa7
·
verified ·
1 Parent(s): 9786e20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -4,6 +4,7 @@ from typing import List
4
  from pathlib import Path
5
  import shutil
6
  import tempfile
 
7
 
8
  from langchain_docling import DoclingLoader
9
  from langchain_docling.loader import ExportType
@@ -24,16 +25,24 @@ async def upload_file(file: UploadFile = File(...)):
24
  # with file_path.open("wb") as buffer:
25
  # shutil.copyfileobj(file.file, buffer)
26
 
27
- with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
28
- # Efficiently write the uploaded file's content to the temporary file
29
- contents = await file.read()
30
- temp_file.write(contents)
31
 
32
- temp_file_path = temp_file.name
33
 
34
- # result = process_with_langchain(file_path)
 
 
 
 
 
 
 
 
35
 
36
- loader = DoclingLoader(file_path=str(temp_file_path), export_type=ExportType.MARKDOWN)
37
  docs = loader.load()
38
  # docs = docs.model_dump()
39
  return {
 
4
  from pathlib import Path
5
  import shutil
6
  import tempfile
7
+ import os
8
 
9
  from langchain_docling import DoclingLoader
10
  from langchain_docling.loader import ExportType
 
25
  # with file_path.open("wb") as buffer:
26
  # shutil.copyfileobj(file.file, buffer)
27
 
28
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
29
+ # # Efficiently write the uploaded file's content to the temporary file
30
+ # contents = await file.read()
31
+ # temp_file.write(contents)
32
 
33
+ # temp_file_path = temp_file.name
34
 
35
+ suffix = os.path.splitext(file.filename)[-1] or ".pdf"
36
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir="/tmp") as tmp:
37
+ shutil.copyfileobj(file.file, tmp)
38
+ tmp_path = tmp.name
39
+
40
+ # At this point, tmp_path is a real file path in /tmp
41
+ # Debug: check if file is valid
42
+ size = os.path.getsize(tmp_path)
43
+ print(f"Saved {file.filename} -> {tmp_path} ({size} bytes)")
44
 
45
+ loader = DoclingLoader(file_path=str(tmp_path), export_type=ExportType.MARKDOWN)
46
  docs = loader.load()
47
  # docs = docs.model_dump()
48
  return {