Spaces:
Runtime error
Runtime error
Aryan Jain commited on
Commit ·
f8067b7
1
Parent(s): bfb0701
remove comments
Browse files- src/utils/_file_client.py +0 -52
src/utils/_file_client.py
CHANGED
|
@@ -4,12 +4,9 @@ import json
|
|
| 4 |
import os
|
| 5 |
import uuid
|
| 6 |
import fitz
|
| 7 |
-
# from docx2pdf import convert
|
| 8 |
from openpyxl import load_workbook
|
| 9 |
-
import aiofiles
|
| 10 |
import markdown2
|
| 11 |
import subprocess
|
| 12 |
-
import tempfile
|
| 13 |
from pathlib import Path
|
| 14 |
|
| 15 |
class FileClient:
|
|
@@ -110,55 +107,6 @@ class FileClient:
|
|
| 110 |
os.remove(file_path)
|
| 111 |
|
| 112 |
|
| 113 |
-
# async with aiofiles.tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_docx:
|
| 114 |
-
# tmp_docx_path = tmp_docx.name
|
| 115 |
-
# await tmp_docx.write(file_bytes.getvalue())
|
| 116 |
-
|
| 117 |
-
# env = os.environ.copy()
|
| 118 |
-
# env.update({
|
| 119 |
-
# "HOME": temp_home,
|
| 120 |
-
# "TMPDIR": output_dir,
|
| 121 |
-
# "SAL_USE_VCLPLUGIN": "svp",
|
| 122 |
-
# "DISPLAY": ":99",
|
| 123 |
-
# })
|
| 124 |
-
|
| 125 |
-
# profile_dir = os.path.join(temp_home, ".config", "libreoffice", "4", "user")
|
| 126 |
-
# os.makedirs(profile_dir, exist_ok=True)
|
| 127 |
-
|
| 128 |
-
# cmd = [
|
| 129 |
-
# "libreoffice",
|
| 130 |
-
# "--headless",
|
| 131 |
-
# "--invisible",
|
| 132 |
-
# "--nodefault",
|
| 133 |
-
# "--nolockcheck",
|
| 134 |
-
# "--nologo",
|
| 135 |
-
# "--norestore",
|
| 136 |
-
# "--convert-to", "pdf",
|
| 137 |
-
# "--outdir", output_dir,
|
| 138 |
-
# tmp_docx_path
|
| 139 |
-
# ]
|
| 140 |
-
|
| 141 |
-
# process = await asyncio.to_thread(
|
| 142 |
-
# subprocess.run,
|
| 143 |
-
# cmd,
|
| 144 |
-
# check=False,
|
| 145 |
-
# capture_output=True,
|
| 146 |
-
# text=True,
|
| 147 |
-
# env=env,
|
| 148 |
-
# timeout=60
|
| 149 |
-
# )
|
| 150 |
-
|
| 151 |
-
# pdf_filename = os.path.splitext(os.path.basename(tmp_docx_path))[0] + ".pdf"
|
| 152 |
-
# tmp_pdf_path = os.path.join(output_dir, pdf_filename)
|
| 153 |
-
|
| 154 |
-
# async with aiofiles.open(tmp_pdf_path, "rb") as pdf_file:
|
| 155 |
-
# pdf_bytes = await pdf_file.read()
|
| 156 |
-
|
| 157 |
-
# final_pdf_bytes = io.BytesIO(pdf_bytes)
|
| 158 |
-
# data["data"] = await self.extract_from_pdf(file_bytes=final_pdf_bytes)
|
| 159 |
-
|
| 160 |
-
# return data
|
| 161 |
-
|
| 162 |
async def extract_from_excel(self, file_bytes: io.BytesIO):
|
| 163 |
wb = load_workbook(file_bytes, data_only=True)
|
| 164 |
|
|
|
|
| 4 |
import os
|
| 5 |
import uuid
|
| 6 |
import fitz
|
|
|
|
| 7 |
from openpyxl import load_workbook
|
|
|
|
| 8 |
import markdown2
|
| 9 |
import subprocess
|
|
|
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
class FileClient:
|
|
|
|
| 107 |
os.remove(file_path)
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
async def extract_from_excel(self, file_bytes: io.BytesIO):
|
| 111 |
wb = load_workbook(file_bytes, data_only=True)
|
| 112 |
|