Marek4321 commited on
Commit
918120f
verified
1 Parent(s): 2fa2126

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -103
app.py CHANGED
@@ -8,6 +8,7 @@ from docx import Document
8
  import io
9
  import tempfile
10
  import logging
 
11
 
12
  # Konfiguracja logowania
13
  logging.basicConfig(
@@ -16,89 +17,13 @@ logging.basicConfig(
16
  )
17
 
18
  class MultiConverter:
19
- def convert_excel_to_formatted_text(self, excel_file):
20
- """Convert Excel to formatted Markdown-style text."""
21
- output = io.StringIO()
22
- workbook = openpyxl.load_workbook(excel_file)
23
- for idx, sheet_name in enumerate(workbook.sheetnames):
24
- if idx > 0:
25
- output.write("\n" + "-" * 70 + "\n\n")
26
- output.write(f"### {sheet_name}:\n")
27
- sheet = workbook[sheet_name]
28
- if sheet.max_row <= 1 and sheet.max_column <= 1:
29
- output.write("# No data in sheet\n\n")
30
- continue
31
- data = []
32
- max_col_widths = []
33
- non_empty_rows = []
34
- non_empty_cols = []
35
- for row_idx in range(1, sheet.max_row + 1):
36
- for col_idx in range(1, sheet.max_column + 1):
37
- cell_value = sheet.cell(row=row_idx, column=col_idx).value
38
- if cell_value:
39
- non_empty_rows.append(row_idx)
40
- non_empty_cols.append(col_idx)
41
- if not non_empty_rows or not non_empty_cols:
42
- output.write("# No data in sheet\n\n")
43
- continue
44
- min_row, max_row = min(non_empty_rows), max(non_empty_rows)
45
- min_col, max_col = min(non_empty_cols), max(non_empty_cols)
46
- max_col_widths = [0] * (max_col - min_col + 1)
47
- for row_idx in range(min_row, max_row + 1):
48
- row_data = []
49
- for col_idx in range(min_col, max_col + 1):
50
- value = str(sheet.cell(row=row_idx, column=col_idx).value or "")
51
- row_data.append(value)
52
- col_pos = col_idx - min_col
53
- max_col_widths[col_pos] = max(max_col_widths[col_pos], len(value))
54
- data.append(row_data)
55
- for row_idx, row in enumerate(data):
56
- if row_idx == 0:
57
- header_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
58
- output.write(header_line + "\n")
59
- separator_line = "|" + "|".join("-" * (width + 2) for width in max_col_widths) + "|"
60
- output.write(separator_line + "\n")
61
- data_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
62
- output.write(data_line + "\n")
63
- output.write("\n")
64
- return output.getvalue()
65
-
66
- def convert_pptx_to_text(self, pptx_file, filename):
67
- """Convert PowerPoint to plain text."""
68
- output = io.StringIO()
69
- prs = Presentation(pptx_file)
70
- output.write(f"# PowerPoint Presentation: {filename}\n\n")
71
- for slide_num, slide in enumerate(prs.slides, 1):
72
- output.write(f"## Slide {slide_num}\n")
73
- for shape in slide.shapes:
74
- if hasattr(shape, "text"):
75
- output.write(f"{shape.text}\n\n")
76
- return output.getvalue()
77
-
78
- def convert_pdf_to_text(self, pdf_file, filename):
79
- """Convert PDF to plain text."""
80
- output = io.StringIO()
81
- pdf_reader = PyPDF2.PdfReader(pdf_file)
82
- output.write(f"# PDF Document: {filename}\n\n")
83
- for page_num, page in enumerate(pdf_reader.pages, 1):
84
- output.write(f"## Page {page_num}\n")
85
- output.write(page.extract_text() + "\n\n")
86
- return output.getvalue()
87
-
88
- def convert_docx_to_text(self, docx_file, filename):
89
- """Convert Word to plain text."""
90
- output = io.StringIO()
91
- doc = Document(docx_file)
92
- output.write(f"# Word Document: {filename}\n\n")
93
- for para in doc.paragraphs:
94
- output.write(para.text + "\n\n")
95
- return output.getvalue()
96
-
97
 
98
  def convert_file(file):
99
- """Simple file conversion function that returns only text"""
100
  if file is None:
101
- return "No file uploaded. Please select a file first."
102
 
103
  try:
104
  logging.info(f"Starting conversion for file: {file.name if hasattr(file, 'name') else 'unknown'}")
@@ -134,9 +59,9 @@ def convert_file(file):
134
  with open(str(file), 'rb') as src, open(temp_file_path, 'wb') as dst:
135
  dst.write(src.read())
136
  except:
137
- return f"Could not read file. Type: {type(file)}"
138
  except Exception as e:
139
- return f"Error reading file: {str(e)}"
140
 
141
  # Okre艣l rozszerzenie pliku
142
  _, file_ext = os.path.splitext(file_name)
@@ -156,46 +81,82 @@ def convert_file(file):
156
  else:
157
  result = f"Unsupported file format: {file_ext}"
158
 
159
- # Zapisz wynik do pliku tymczasowego dla u偶ytkownika
160
- output_file_path = os.path.splitext(file_name)[0] + ".txt"
161
- with open(os.path.join(temp_dir, output_file_path), 'w', encoding='utf-8') as f:
 
 
 
162
  f.write(result)
163
-
164
- return result
 
 
 
 
165
  except Exception as e:
166
  logging.exception(f"Error converting file: {str(e)}")
167
- return f"Error converting file: {str(e)}"
168
  finally:
169
  # Usu艅 pliki tymczasowe
170
  try:
171
  if os.path.exists(temp_file_path):
172
  os.unlink(temp_file_path)
 
 
173
  os.rmdir(temp_dir)
174
- except:
175
- pass
176
 
177
  except Exception as e:
178
  logging.exception(f"Unexpected error: {str(e)}")
179
- return f"Unexpected error: {str(e)}"
180
 
181
 
182
- # Utw贸rz interfejs Gradio - prostszy wariant
183
- interface = gr.Interface(
184
- fn=convert_file,
185
- inputs=gr.File(label="Upload a file (Excel, PowerPoint, PDF, or Word)"),
186
- outputs=gr.Textbox(label="Converted Text", lines=15),
187
- title="Multi-Format to TXT Converter by Heuristica.pl",
188
- description="Convert Excel, PowerPoint, PDF, and Word files to text format. Simply upload a file and click submit to convert it to text.",
189
- allow_flagging="never",
190
- examples=None,
191
- cache_examples=False
192
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # Uruchom aplikacj臋
195
  if __name__ == "__main__":
196
  try:
197
  logging.info("Starting the application")
198
- interface.launch(debug=True)
199
  logging.info("Application stopped")
200
  except Exception as e:
201
  logging.exception(f"Error launching application: {str(e)}")
 
8
  import io
9
  import tempfile
10
  import logging
11
+ import base64
12
 
13
  # Konfiguracja logowania
14
  logging.basicConfig(
 
17
  )
18
 
19
  class MultiConverter:
20
+ # [Ca艂a klasa MultiConverter pozostaje bez zmian]
21
+ # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def convert_file(file):
24
+ """Process uploaded file and convert it to text"""
25
  if file is None:
26
+ return "No file uploaded. Please select a file first.", None
27
 
28
  try:
29
  logging.info(f"Starting conversion for file: {file.name if hasattr(file, 'name') else 'unknown'}")
 
59
  with open(str(file), 'rb') as src, open(temp_file_path, 'wb') as dst:
60
  dst.write(src.read())
61
  except:
62
+ return f"Could not read file. Type: {type(file)}", None
63
  except Exception as e:
64
+ return f"Error reading file: {str(e)}", None
65
 
66
  # Okre艣l rozszerzenie pliku
67
  _, file_ext = os.path.splitext(file_name)
 
81
  else:
82
  result = f"Unsupported file format: {file_ext}"
83
 
84
+ # Utw贸rz nazw臋 pliku wyj艣ciowego
85
+ output_filename = os.path.splitext(file_name)[0] + ".txt"
86
+
87
+ # Przygotuj plik do pobrania
88
+ output_file_path = os.path.join(temp_dir, output_filename)
89
+ with open(output_file_path, 'w', encoding='utf-8') as f:
90
  f.write(result)
91
+
92
+ # Przygotuj plik do zwr贸cenia przez Gradio
93
+ with open(output_file_path, 'rb') as f:
94
+ output_content = f.read()
95
+
96
+ return result, (output_filename, output_content)
97
  except Exception as e:
98
  logging.exception(f"Error converting file: {str(e)}")
99
+ return f"Error converting file: {str(e)}", None
100
  finally:
101
  # Usu艅 pliki tymczasowe
102
  try:
103
  if os.path.exists(temp_file_path):
104
  os.unlink(temp_file_path)
105
+ if os.path.exists(output_file_path):
106
+ os.unlink(output_file_path)
107
  os.rmdir(temp_dir)
108
+ except Exception as e:
109
+ logging.warning(f"Could not clean up temporary files: {str(e)}")
110
 
111
  except Exception as e:
112
  logging.exception(f"Unexpected error: {str(e)}")
113
+ return f"Unexpected error: {str(e)}", None
114
 
115
 
116
+ # Utw贸rz interfejs Gradio
117
+ with gr.Blocks(title="Multi-Format to TXT Converter") as app:
118
+ gr.Markdown("# Multi-Format to TXT Converter by Heuristica.pl")
119
+ gr.Markdown("Convert Excel, PowerPoint, PDF, and Word files to text format.")
120
+
121
+ with gr.Row():
122
+ file_input = gr.File(label="Upload a file (Excel, PowerPoint, PDF, or Word)")
123
+
124
+ with gr.Row():
125
+ convert_button = gr.Button("Convert to TXT", variant="primary")
126
+
127
+ with gr.Row():
128
+ text_output = gr.Textbox(label="Converted Text", lines=15)
129
+
130
+ with gr.Row():
131
+ file_download = gr.File(label="Download Converted File")
132
+
133
+ # Info about supported formats
134
+ gr.Markdown("""
135
+ ## Supported file formats:
136
+ - **Excel**: .xlsx, .xls
137
+ - **PowerPoint**: .pptx, .ppt
138
+ - **PDF**: .pdf
139
+ - **Word**: .docx, .doc
140
+
141
+ ## How to use:
142
+ 1. Upload a file using the file upload button
143
+ 2. Click "Convert to TXT"
144
+ 3. View the converted text
145
+ 4. Download the converted text file
146
+ """)
147
+
148
+ # Obs艂uga konwersji
149
+ convert_button.click(
150
+ fn=convert_file,
151
+ inputs=[file_input],
152
+ outputs=[text_output, file_download]
153
+ )
154
 
155
  # Uruchom aplikacj臋
156
  if __name__ == "__main__":
157
  try:
158
  logging.info("Starting the application")
159
+ app.launch(debug=True)
160
  logging.info("Application stopped")
161
  except Exception as e:
162
  logging.exception(f"Error launching application: {str(e)}")