Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from docx import Document
|
|
| 8 |
import io
|
| 9 |
import tempfile
|
| 10 |
import logging
|
|
|
|
| 11 |
|
| 12 |
# Konfiguracja logowania
|
| 13 |
logging.basicConfig(
|
|
@@ -16,89 +17,13 @@ logging.basicConfig(
|
|
| 16 |
)
|
| 17 |
|
| 18 |
class MultiConverter:
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
output = io.StringIO()
|
| 22 |
-
workbook = openpyxl.load_workbook(excel_file)
|
| 23 |
-
for idx, sheet_name in enumerate(workbook.sheetnames):
|
| 24 |
-
if idx > 0:
|
| 25 |
-
output.write("\n" + "-" * 70 + "\n\n")
|
| 26 |
-
output.write(f"### {sheet_name}:\n")
|
| 27 |
-
sheet = workbook[sheet_name]
|
| 28 |
-
if sheet.max_row <= 1 and sheet.max_column <= 1:
|
| 29 |
-
output.write("# No data in sheet\n\n")
|
| 30 |
-
continue
|
| 31 |
-
data = []
|
| 32 |
-
max_col_widths = []
|
| 33 |
-
non_empty_rows = []
|
| 34 |
-
non_empty_cols = []
|
| 35 |
-
for row_idx in range(1, sheet.max_row + 1):
|
| 36 |
-
for col_idx in range(1, sheet.max_column + 1):
|
| 37 |
-
cell_value = sheet.cell(row=row_idx, column=col_idx).value
|
| 38 |
-
if cell_value:
|
| 39 |
-
non_empty_rows.append(row_idx)
|
| 40 |
-
non_empty_cols.append(col_idx)
|
| 41 |
-
if not non_empty_rows or not non_empty_cols:
|
| 42 |
-
output.write("# No data in sheet\n\n")
|
| 43 |
-
continue
|
| 44 |
-
min_row, max_row = min(non_empty_rows), max(non_empty_rows)
|
| 45 |
-
min_col, max_col = min(non_empty_cols), max(non_empty_cols)
|
| 46 |
-
max_col_widths = [0] * (max_col - min_col + 1)
|
| 47 |
-
for row_idx in range(min_row, max_row + 1):
|
| 48 |
-
row_data = []
|
| 49 |
-
for col_idx in range(min_col, max_col + 1):
|
| 50 |
-
value = str(sheet.cell(row=row_idx, column=col_idx).value or "")
|
| 51 |
-
row_data.append(value)
|
| 52 |
-
col_pos = col_idx - min_col
|
| 53 |
-
max_col_widths[col_pos] = max(max_col_widths[col_pos], len(value))
|
| 54 |
-
data.append(row_data)
|
| 55 |
-
for row_idx, row in enumerate(data):
|
| 56 |
-
if row_idx == 0:
|
| 57 |
-
header_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
|
| 58 |
-
output.write(header_line + "\n")
|
| 59 |
-
separator_line = "|" + "|".join("-" * (width + 2) for width in max_col_widths) + "|"
|
| 60 |
-
output.write(separator_line + "\n")
|
| 61 |
-
data_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
|
| 62 |
-
output.write(data_line + "\n")
|
| 63 |
-
output.write("\n")
|
| 64 |
-
return output.getvalue()
|
| 65 |
-
|
| 66 |
-
def convert_pptx_to_text(self, pptx_file, filename):
|
| 67 |
-
"""Convert PowerPoint to plain text."""
|
| 68 |
-
output = io.StringIO()
|
| 69 |
-
prs = Presentation(pptx_file)
|
| 70 |
-
output.write(f"# PowerPoint Presentation: {filename}\n\n")
|
| 71 |
-
for slide_num, slide in enumerate(prs.slides, 1):
|
| 72 |
-
output.write(f"## Slide {slide_num}\n")
|
| 73 |
-
for shape in slide.shapes:
|
| 74 |
-
if hasattr(shape, "text"):
|
| 75 |
-
output.write(f"{shape.text}\n\n")
|
| 76 |
-
return output.getvalue()
|
| 77 |
-
|
| 78 |
-
def convert_pdf_to_text(self, pdf_file, filename):
|
| 79 |
-
"""Convert PDF to plain text."""
|
| 80 |
-
output = io.StringIO()
|
| 81 |
-
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
| 82 |
-
output.write(f"# PDF Document: {filename}\n\n")
|
| 83 |
-
for page_num, page in enumerate(pdf_reader.pages, 1):
|
| 84 |
-
output.write(f"## Page {page_num}\n")
|
| 85 |
-
output.write(page.extract_text() + "\n\n")
|
| 86 |
-
return output.getvalue()
|
| 87 |
-
|
| 88 |
-
def convert_docx_to_text(self, docx_file, filename):
|
| 89 |
-
"""Convert Word to plain text."""
|
| 90 |
-
output = io.StringIO()
|
| 91 |
-
doc = Document(docx_file)
|
| 92 |
-
output.write(f"# Word Document: {filename}\n\n")
|
| 93 |
-
for para in doc.paragraphs:
|
| 94 |
-
output.write(para.text + "\n\n")
|
| 95 |
-
return output.getvalue()
|
| 96 |
-
|
| 97 |
|
| 98 |
def convert_file(file):
|
| 99 |
-
"""
|
| 100 |
if file is None:
|
| 101 |
-
return "No file uploaded. Please select a file first."
|
| 102 |
|
| 103 |
try:
|
| 104 |
logging.info(f"Starting conversion for file: {file.name if hasattr(file, 'name') else 'unknown'}")
|
|
@@ -134,9 +59,9 @@ def convert_file(file):
|
|
| 134 |
with open(str(file), 'rb') as src, open(temp_file_path, 'wb') as dst:
|
| 135 |
dst.write(src.read())
|
| 136 |
except:
|
| 137 |
-
return f"Could not read file. Type: {type(file)}"
|
| 138 |
except Exception as e:
|
| 139 |
-
return f"Error reading file: {str(e)}"
|
| 140 |
|
| 141 |
# Okre艣l rozszerzenie pliku
|
| 142 |
_, file_ext = os.path.splitext(file_name)
|
|
@@ -156,46 +81,82 @@ def convert_file(file):
|
|
| 156 |
else:
|
| 157 |
result = f"Unsupported file format: {file_ext}"
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
| 162 |
f.write(result)
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
except Exception as e:
|
| 166 |
logging.exception(f"Error converting file: {str(e)}")
|
| 167 |
-
return f"Error converting file: {str(e)}"
|
| 168 |
finally:
|
| 169 |
# Usu艅 pliki tymczasowe
|
| 170 |
try:
|
| 171 |
if os.path.exists(temp_file_path):
|
| 172 |
os.unlink(temp_file_path)
|
|
|
|
|
|
|
| 173 |
os.rmdir(temp_dir)
|
| 174 |
-
except:
|
| 175 |
-
|
| 176 |
|
| 177 |
except Exception as e:
|
| 178 |
logging.exception(f"Unexpected error: {str(e)}")
|
| 179 |
-
return f"Unexpected error: {str(e)}"
|
| 180 |
|
| 181 |
|
| 182 |
-
# Utw贸rz interfejs Gradio
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
# Uruchom aplikacj臋
|
| 195 |
if __name__ == "__main__":
|
| 196 |
try:
|
| 197 |
logging.info("Starting the application")
|
| 198 |
-
|
| 199 |
logging.info("Application stopped")
|
| 200 |
except Exception as e:
|
| 201 |
logging.exception(f"Error launching application: {str(e)}")
|
|
|
|
| 8 |
import io
|
| 9 |
import tempfile
|
| 10 |
import logging
|
| 11 |
+
import base64
|
| 12 |
|
| 13 |
# Konfiguracja logowania
|
| 14 |
logging.basicConfig(
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
class MultiConverter:
|
| 20 |
+
# [Ca艂a klasa MultiConverter pozostaje bez zmian]
|
| 21 |
+
# ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def convert_file(file):
|
| 24 |
+
"""Process uploaded file and convert it to text"""
|
| 25 |
if file is None:
|
| 26 |
+
return "No file uploaded. Please select a file first.", None
|
| 27 |
|
| 28 |
try:
|
| 29 |
logging.info(f"Starting conversion for file: {file.name if hasattr(file, 'name') else 'unknown'}")
|
|
|
|
| 59 |
with open(str(file), 'rb') as src, open(temp_file_path, 'wb') as dst:
|
| 60 |
dst.write(src.read())
|
| 61 |
except:
|
| 62 |
+
return f"Could not read file. Type: {type(file)}", None
|
| 63 |
except Exception as e:
|
| 64 |
+
return f"Error reading file: {str(e)}", None
|
| 65 |
|
| 66 |
# Okre艣l rozszerzenie pliku
|
| 67 |
_, file_ext = os.path.splitext(file_name)
|
|
|
|
| 81 |
else:
|
| 82 |
result = f"Unsupported file format: {file_ext}"
|
| 83 |
|
| 84 |
+
# Utw贸rz nazw臋 pliku wyj艣ciowego
|
| 85 |
+
output_filename = os.path.splitext(file_name)[0] + ".txt"
|
| 86 |
+
|
| 87 |
+
# Przygotuj plik do pobrania
|
| 88 |
+
output_file_path = os.path.join(temp_dir, output_filename)
|
| 89 |
+
with open(output_file_path, 'w', encoding='utf-8') as f:
|
| 90 |
f.write(result)
|
| 91 |
+
|
| 92 |
+
# Przygotuj plik do zwr贸cenia przez Gradio
|
| 93 |
+
with open(output_file_path, 'rb') as f:
|
| 94 |
+
output_content = f.read()
|
| 95 |
+
|
| 96 |
+
return result, (output_filename, output_content)
|
| 97 |
except Exception as e:
|
| 98 |
logging.exception(f"Error converting file: {str(e)}")
|
| 99 |
+
return f"Error converting file: {str(e)}", None
|
| 100 |
finally:
|
| 101 |
# Usu艅 pliki tymczasowe
|
| 102 |
try:
|
| 103 |
if os.path.exists(temp_file_path):
|
| 104 |
os.unlink(temp_file_path)
|
| 105 |
+
if os.path.exists(output_file_path):
|
| 106 |
+
os.unlink(output_file_path)
|
| 107 |
os.rmdir(temp_dir)
|
| 108 |
+
except Exception as e:
|
| 109 |
+
logging.warning(f"Could not clean up temporary files: {str(e)}")
|
| 110 |
|
| 111 |
except Exception as e:
|
| 112 |
logging.exception(f"Unexpected error: {str(e)}")
|
| 113 |
+
return f"Unexpected error: {str(e)}", None
|
| 114 |
|
| 115 |
|
| 116 |
+
# Utw贸rz interfejs Gradio
|
| 117 |
+
with gr.Blocks(title="Multi-Format to TXT Converter") as app:
|
| 118 |
+
gr.Markdown("# Multi-Format to TXT Converter by Heuristica.pl")
|
| 119 |
+
gr.Markdown("Convert Excel, PowerPoint, PDF, and Word files to text format.")
|
| 120 |
+
|
| 121 |
+
with gr.Row():
|
| 122 |
+
file_input = gr.File(label="Upload a file (Excel, PowerPoint, PDF, or Word)")
|
| 123 |
+
|
| 124 |
+
with gr.Row():
|
| 125 |
+
convert_button = gr.Button("Convert to TXT", variant="primary")
|
| 126 |
+
|
| 127 |
+
with gr.Row():
|
| 128 |
+
text_output = gr.Textbox(label="Converted Text", lines=15)
|
| 129 |
+
|
| 130 |
+
with gr.Row():
|
| 131 |
+
file_download = gr.File(label="Download Converted File")
|
| 132 |
+
|
| 133 |
+
# Info about supported formats
|
| 134 |
+
gr.Markdown("""
|
| 135 |
+
## Supported file formats:
|
| 136 |
+
- **Excel**: .xlsx, .xls
|
| 137 |
+
- **PowerPoint**: .pptx, .ppt
|
| 138 |
+
- **PDF**: .pdf
|
| 139 |
+
- **Word**: .docx, .doc
|
| 140 |
+
|
| 141 |
+
## How to use:
|
| 142 |
+
1. Upload a file using the file upload button
|
| 143 |
+
2. Click "Convert to TXT"
|
| 144 |
+
3. View the converted text
|
| 145 |
+
4. Download the converted text file
|
| 146 |
+
""")
|
| 147 |
+
|
| 148 |
+
# Obs艂uga konwersji
|
| 149 |
+
convert_button.click(
|
| 150 |
+
fn=convert_file,
|
| 151 |
+
inputs=[file_input],
|
| 152 |
+
outputs=[text_output, file_download]
|
| 153 |
+
)
|
| 154 |
|
| 155 |
# Uruchom aplikacj臋
|
| 156 |
if __name__ == "__main__":
|
| 157 |
try:
|
| 158 |
logging.info("Starting the application")
|
| 159 |
+
app.launch(debug=True)
|
| 160 |
logging.info("Application stopped")
|
| 161 |
except Exception as e:
|
| 162 |
logging.exception(f"Error launching application: {str(e)}")
|