Marek4321 commited on
Commit
fe14ce8
verified
1 Parent(s): 4467901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -73
app.py CHANGED
@@ -99,110 +99,107 @@ def convert_file(file):
99
  """Process uploaded file and convert it to text"""
100
  if file is None:
101
  logging.warning("No file provided")
102
- return "No file uploaded. Please select a file first.", None, None
103
 
104
  try:
105
  # Log file info
106
  logging.info(f"File type: {type(file)}")
107
- logging.info(f"File attributes: {dir(file) if hasattr(file, '__dir__') else 'No dir method'}")
108
 
109
- # Wczesna obs艂uga obiektu NamedString
110
- if str(type(file)).find('NamedString') > -1:
111
- logging.info("Detected NamedString object")
112
- # Traktuj plik jako 艣cie偶k臋, ale zachowaj informacj臋 o nazwie
113
- filename = file.name if hasattr(file, 'name') else "unknown.file"
114
- try:
115
- with open(str(file), 'rb') as f:
116
- file_content = f.read()
117
- logging.info(f"Successfully read file content from NamedString as path")
118
- except Exception as e:
119
- error_msg = f"Could not read NamedString as path: {str(e)}"
120
- logging.error(error_msg)
121
- return error_msg, None, error_msg
122
- else:
123
- # Handle different types of file objects
124
  if hasattr(file, 'name'):
 
125
  filename = file.name
126
- elif isinstance(file, tuple) and len(file) > 1:
127
- filename = file[0] # In some versions, Gradio returns (filename, temppath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  else:
129
- filename = "unknown_file"
130
- logging.warning(f"Unknown file object type: {type(file)}")
131
-
132
- logging.info(f"Converting file: {filename}")
133
-
134
- # Handle different file object types
135
- if hasattr(file, 'read'):
136
- # File-like object with read method
137
- file_content = file.read()
138
- elif isinstance(file, tuple) and len(file) > 1 and os.path.exists(file[1]):
139
- # Tuple with (name, path)
140
- with open(file[1], 'rb') as f:
141
- file_content = f.read()
142
- elif isinstance(file, str) and os.path.exists(file):
143
- # Direct path to file
144
- with open(file, 'rb') as f:
145
- file_content = f.read()
146
- else:
147
- error_msg = f"Could not read file content from object type: {type(file)}"
148
- logging.error(error_msg)
149
- return error_msg, None, error_msg
150
 
151
- converter = MultiConverter()
152
  _, file_ext = os.path.splitext(filename)
153
  file_ext = file_ext.lower()
154
- logging.info(f"File extension: {file_ext}")
155
- logging.info(f"Read {len(file_content)} bytes from input file")
156
 
157
- # Create a temporary file
158
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
159
- temp_file.write(file_content)
160
- temp_file_path = temp_file.name
161
- logging.info(f"Created temporary file at: {temp_file_path}")
 
162
 
 
163
  try:
164
- result = None
 
165
  if file_ext in [".xlsx", ".xls"]:
166
  logging.info("Processing Excel file")
167
- result = converter.convert_excel_to_formatted_text(temp_file_path)
168
  elif file_ext in [".pptx", ".ppt"]:
169
  logging.info("Processing PowerPoint file")
170
- result = converter.convert_pptx_to_text(temp_file_path, filename)
171
  elif file_ext == ".pdf":
172
  logging.info("Processing PDF file")
173
- result = converter.convert_pdf_to_text(temp_file_path, filename)
174
  elif file_ext in [".docx", ".doc"]:
175
  logging.info("Processing Word file")
176
- result = converter.convert_docx_to_text(temp_file_path, filename)
177
  else:
178
  error_msg = f"Unsupported file format: {file_ext}"
179
  logging.error(error_msg)
180
- result = error_msg
181
 
182
- logging.info(f"Conversion successful, text length: {len(result) if result else 0}")
183
-
184
- # Create and return file for download
185
  output_filename = os.path.splitext(filename)[0] + ".txt"
 
 
 
 
 
 
186
 
187
- # Convert result to bytes for file download
188
- if result:
189
- file_content = result.encode('utf-8')
190
- logging.info(f"Created output file: {output_filename}, size: {len(file_content)} bytes")
191
- return result, (output_filename, file_content), result
192
- else:
193
- logging.warning("No result generated")
194
- return "No result generated", None, None
195
  finally:
196
- # Clean up the temporary file
197
- if os.path.exists(temp_file_path):
198
- logging.info(f"Removing temporary file: {temp_file_path}")
199
- os.unlink(temp_file_path)
200
- else:
201
- logging.warning(f"Temporary file not found for deletion: {temp_file_path}")
 
 
202
  except Exception as e:
203
- error_msg = f"Error converting file: {str(e)}"
204
  logging.exception(error_msg)
205
- return error_msg, None, None
206
 
207
 
208
  # Create Gradio interface
 
99
  """Process uploaded file and convert it to text"""
100
  if file is None:
101
  logging.warning("No file provided")
102
+ return "No file uploaded. Please select a file first.", None, "No file uploaded"
103
 
104
  try:
105
  # Log file info
106
  logging.info(f"File type: {type(file)}")
 
107
 
108
+ # Uzyskaj nazw臋 i 艣cie偶k臋 pliku
109
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  if hasattr(file, 'name'):
111
+ # Standardowy przypadek - obiekt ma atrybut name
112
  filename = file.name
113
+ # Pr贸bujemy r贸偶ne sposoby dost臋pu do zawarto艣ci pliku
114
+ if hasattr(file, 'read'):
115
+ # Obiekt ma metod臋 read()
116
+ content = file.read()
117
+ temp_path = None
118
+ elif hasattr(file, '__fspath__') or isinstance(file, str):
119
+ # Obiekt jest 艣cie偶k膮 lub mo偶na go konwertowa膰 na 艣cie偶k臋
120
+ filepath = str(file)
121
+ if os.path.exists(filepath):
122
+ with open(filepath, 'rb') as f:
123
+ content = f.read()
124
+ temp_path = filepath
125
+ else:
126
+ return f"File not found at path: {filepath}", None, f"Error: File not found"
127
+ else:
128
+ # Nie mo偶emy bezpo艣rednio odczyta膰 pliku
129
+ return f"Cannot read file content from {type(file)}", None, "Error: Cannot read file"
130
+ elif isinstance(file, tuple) and len(file) >= 2:
131
+ # Przypadek gdzie file to tuple (filename, filepath)
132
+ filename = file[0]
133
+ filepath = file[1]
134
+ if os.path.exists(filepath):
135
+ with open(filepath, 'rb') as f:
136
+ content = f.read()
137
+ temp_path = filepath
138
+ else:
139
+ return f"File not found at path: {filepath}", None, f"Error: File not found"
140
  else:
141
+ # Nie mamy nazwy pliku ani 艣cie偶ki
142
+ return f"Cannot determine file name or path from {type(file)}", None, "Error: Cannot determine file"
143
+ except Exception as e:
144
+ error_msg = f"Error accessing file: {str(e)}"
145
+ logging.exception(error_msg)
146
+ return error_msg, None, "Error accessing file"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # Okre艣l rozszerzenie pliku
149
  _, file_ext = os.path.splitext(filename)
150
  file_ext = file_ext.lower()
151
+ logging.info(f"File name: {filename}, extension: {file_ext}")
 
152
 
153
+ # Utw贸rz tymczasowy plik, je艣li go nie mamy
154
+ if temp_path is None:
155
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
156
+ temp_file.write(content)
157
+ temp_path = temp_file.name
158
+ logging.info(f"Created temporary file at: {temp_path}")
159
 
160
+ # Konwertuj plik na tekst
161
  try:
162
+ converter = MultiConverter()
163
+
164
  if file_ext in [".xlsx", ".xls"]:
165
  logging.info("Processing Excel file")
166
+ result = converter.convert_excel_to_formatted_text(temp_path)
167
  elif file_ext in [".pptx", ".ppt"]:
168
  logging.info("Processing PowerPoint file")
169
+ result = converter.convert_pptx_to_text(temp_path, filename)
170
  elif file_ext == ".pdf":
171
  logging.info("Processing PDF file")
172
+ result = converter.convert_pdf_to_text(temp_path, filename)
173
  elif file_ext in [".docx", ".doc"]:
174
  logging.info("Processing Word file")
175
+ result = converter.convert_docx_to_text(temp_path, filename)
176
  else:
177
  error_msg = f"Unsupported file format: {file_ext}"
178
  logging.error(error_msg)
179
+ return error_msg, None, f"Error: {error_msg}"
180
 
181
+ # Generuj plik wynikowy
 
 
182
  output_filename = os.path.splitext(filename)[0] + ".txt"
183
+ output_content = result.encode('utf-8')
184
+
185
+ logging.info(f"Conversion successful, created output file: {output_filename}")
186
+
187
+ # Zwr贸膰 wynik jako string, plik do pobrania i status
188
+ return result, (output_filename, output_content), "Conversion completed successfully"
189
 
 
 
 
 
 
 
 
 
190
  finally:
191
+ # Zawsze usuwaj tymczasowy plik, je艣li go utworzyli艣my
192
+ if temp_path and temp_path != str(file) and os.path.exists(temp_path):
193
+ try:
194
+ os.unlink(temp_path)
195
+ logging.info(f"Removed temporary file: {temp_path}")
196
+ except Exception as e:
197
+ logging.warning(f"Could not remove temporary file {temp_path}: {str(e)}")
198
+
199
  except Exception as e:
200
+ error_msg = f"Error during conversion: {str(e)}"
201
  logging.exception(error_msg)
202
+ return error_msg, None, f"Error: {error_msg}"
203
 
204
 
205
  # Create Gradio interface