File size: 1,266 Bytes
8cec716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from langchain_community.document_loaders import UnstructuredMarkdownLoader, PyPDFLoader
from fastapi import UploadFile
import shutil

async def process_file(file: UploadFile) -> str:
    """
    Saves the uploaded file temporarily, loads its content based on extension,
    and returns the text content.
    """
    temp_filename = f"temp_{file.filename}"
    with open(temp_filename, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)

    content = ""
    try:
        if temp_filename.endswith(".md"):
            loader = UnstructuredMarkdownLoader(temp_filename)
            docs = loader.load()
            content = "\n\n".join([d.page_content for d in docs])
        elif temp_filename.endswith(".pdf"):
            loader = PyPDFLoader(temp_filename)
            docs = loader.load()
            content = "\n\n".join([d.page_content for d in docs])
        else:
            # Fallback for text files
            with open(temp_filename, "r", encoding="utf-8") as f:
                content = f.read()
    except Exception as e:
        print(f"Error processing file: {e}")
        content = "Error processing file."
    finally:
        if os.path.exists(temp_filename):
            os.remove(temp_filename)
    
    return content