File size: 1,742 Bytes
8b53865 8b3bb10 8b53865 8b3bb10 8b53865 8b3bb10 8b53865 8b3bb10 8b53865 8b3bb10 8b53865 8b3bb10 8b53865 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import os
import tempfile
from typing import List
from fastapi import UploadFile
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
class FileProcessor:
def __init__(self):
self.text_splitter = CharacterTextSplitter()
async def process_file(self, file: UploadFile) -> List[str]:
"""Process an uploaded file and return text chunks."""
print(f"Processing file: {file.filename}")
# Create a temporary file with the correct extension
suffix = f".{file.filename.split('.')[-1]}"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
# Write the uploaded file content to the temporary file
content = await file.read()
temp_file.write(content)
temp_file.flush()
print(f"Created temporary file at: {temp_file.name}")
try:
# Create appropriate loader based on file type
loader = self._get_loader(temp_file.name, file.filename)
# Load and process the documents
documents = loader.load_documents()
texts = self.text_splitter.split_texts(documents)
return texts
finally:
# Clean up the temporary file
try:
os.unlink(temp_file.name)
except Exception as e:
print(f"Error cleaning up temporary file: {e}")
def _get_loader(self, file_path: str, original_filename: str):
"""Get the appropriate loader based on file type."""
if original_filename.lower().endswith(".pdf"):
return PDFLoader(file_path)
return TextFileLoader(file_path)
|