web-crawling

Paused

App Files Files Community

pvanand commited on Oct 8, 2024

Commit

9e5e37a

verified ·

1 Parent(s): 338bb7c

Update file_conversion.py

Browse files

Files changed (1) hide show

file_conversion.py +0 -14

file_conversion.py CHANGED Viewed

@@ -17,50 +17,36 @@ TEMP_DIR = "/.tempfiles"
 def remove_file(path: str):
     if os.path.exists(path):
         os.unlink(path)
-        logger.info(f"Removed temporary file: {path}")
 @router.post("/convert/pdf_to_docx")
 async def convert_pdf_to_docx(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="File must be a PDF")
-    # Ensure the temp directory exists
     os.makedirs(TEMP_DIR, exist_ok=True)
     pdf_temp_path = os.path.join(TEMP_DIR, f"temp_{file.filename}")
     docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
     try:
-        # Save the uploaded file
         with open(pdf_temp_path, "wb") as pdf_file:
             shutil.copyfileobj(file.file, pdf_file)
-        logger.info(f"Starting conversion of {pdf_temp_path}")
-        # Convert PDF to DOCX
         cv = Converter(pdf_temp_path)
         cv.convert(docx_temp_path)
         cv.close()
-        logger.info(f"Conversion completed. Output file: {docx_temp_path}")
-        # Check if the file exists
         if not os.path.exists(docx_temp_path):
             raise FileNotFoundError(f"Converted file not found: {docx_temp_path}")
-        # Schedule file removal after response is sent
         background_tasks.add_task(remove_file, pdf_temp_path)
         background_tasks.add_task(remove_file, docx_temp_path)
-        # Return the DOCX file
         return FileResponse(
             docx_temp_path,
             media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
             filename=file.filename.replace('.pdf', '.docx')
         )
     except Exception as e:
-        logger.error(f"Conversion failed: {str(e)}")
-        # Clean up files in case of an error
         remove_file(pdf_temp_path)
         remove_file(docx_temp_path)
         raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")

 def remove_file(path: str):
     if os.path.exists(path):
         os.unlink(path)
 @router.post("/convert/pdf_to_docx")
 async def convert_pdf_to_docx(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="File must be a PDF")
     os.makedirs(TEMP_DIR, exist_ok=True)
     pdf_temp_path = os.path.join(TEMP_DIR, f"temp_{file.filename}")
     docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
     try:
         with open(pdf_temp_path, "wb") as pdf_file:
             shutil.copyfileobj(file.file, pdf_file)
         cv = Converter(pdf_temp_path)
         cv.convert(docx_temp_path)
         cv.close()
         if not os.path.exists(docx_temp_path):
             raise FileNotFoundError(f"Converted file not found: {docx_temp_path}")
         background_tasks.add_task(remove_file, pdf_temp_path)
         background_tasks.add_task(remove_file, docx_temp_path)
         return FileResponse(
             docx_temp_path,
             media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
             filename=file.filename.replace('.pdf', '.docx')
         )
     except Exception as e:
         remove_file(pdf_temp_path)
         remove_file(docx_temp_path)
         raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")