Spaces:

MrSimple01
/

AIEXP_1

Sleeping

App Files Files Community

MrSimple01 commited on Oct 18, 2025

Commit

48c1e22

verified ·

1 Parent(s): eae2fa3

Update converters/converter.py

Browse files

Files changed (1) hide show

converters/converter.py +125 -116

converters/converter.py CHANGED Viewed

@@ -1,116 +1,125 @@
-from config import *
-from utils import log_message
-import json
-import pandas as pd
-import os
-def process_uploaded_file(file, file_type):
-    """Обработка загруженного файла и добавление в систему"""
-    try:
-        if file is None:
-            return "❌ Файл не выбран"
-        from huggingface_hub import HfApi
-        import tempfile
-        import shutil
-        # Создаем временную директорию
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Сохраняем загруженный файл
-            file_path = os.path.join(temp_dir, file.name)
-            shutil.copy(file.name, file_path)
-            # Определяем целевую директорию на HuggingFace
-            if file_type == "Таблица":
-                target_dir = TABLE_DATA_DIR
-                # Конвертируем Excel в JSON
-                if file.name.endswith(('.xlsx', '.xls')):
-                    json_path = convert_single_excel_to_json(file_path, temp_dir)
-                    upload_file = json_path
-                else:
-                    upload_file = file_path
-            elif file_type == "Изображение (метаданные)":
-                target_dir = IMAGE_DATA_DIR
-                # Конвертируем Excel в CSV
-                if file.name.endswith(('.xlsx', '.xls')):
-                    csv_path = convert_single_excel_to_csv(file_path, temp_dir)
-                    upload_file = csv_path
-                else:
-                    upload_file = file_path
-            else:  # JSON документ
-                target_dir = JSON_FILES_DIR
-                upload_file = file_path
-            # Загружаем на HuggingFace
-            api = HfApi()
-            api.upload_file(
-                path_or_fileobj=upload_file,
-                path_in_repo=f"{target_dir}/{os.path.basename(upload_file)}",
-                repo_id=HF_REPO_ID,
-                token=HF_TOKEN,
-                repo_type="dataset"
-            )
-            log_message(f"Файл {file.name} успешно загружен в {target_dir}")
-            return f"✅ Файл успешно загружен и обработан: {os.path.basename(upload_file)}\n⚠️ Перезапустите систему для применения изменений"
-    except Exception as e:
-        error_msg = f"Ошибка обработки файла: {str(e)}"
-        log_message(error_msg)
-        return f"❌ {error_msg}"
-def convert_single_excel_to_json(excel_path, output_dir):
-    """Конвертация одного Excel файла в JSON для таблиц"""
-    df_dict = pd.read_excel(excel_path, sheet_name=None)
-    result = {
-        "document": os.path.basename(excel_path),
-        "total_sheets": len(df_dict),
-        "sheets": []
-    }
-    for sheet_name, df in df_dict.items():
-        if df.empty or "Номер таблицы" not in df.columns:
-            continue
-        df = df.dropna(how='all').fillna("")
-        grouped = df.groupby("Номер таблицы")
-        for table_number, group in grouped:
-            group = group.reset_index(drop=True)
-            sheet_data = {
-                "sheet_name": sheet_name,
-                "document_id": str(group.iloc[0].get("Обозначение документа", "")),
-                "section": str(group.iloc[0].get("Раздел документа", "")),
-                "table_number": str(table_number),
-                "table_title": str(group.iloc[0].get("Название таблицы", "")),
-                "table_description": str(group.iloc[0].get("Примечание", "")),
-                "headers": [col for col in df.columns if col not in
-                           ["Обозначение документа", "Раздел документа", "Номер таблицы",
-                            "Название таблицы", "Примечание"]],
-                "data": []
-            }
-            for _, row in group.iterrows():
-                row_dict = {col: str(row[col]) if pd.notna(row[col]) else ""
-                           for col in sheet_data["headers"]}
-                sheet_data["data"].append(row_dict)
-            result["sheets"].append(sheet_data)
-    json_filename = os.path.basename(excel_path).replace('.xlsx', '.json').replace('.xls', '.json')
-    json_path = os.path.join(output_dir, json_filename)
-    with open(json_path, 'w', encoding='utf-8') as f:
-        json.dump(result, f, ensure_ascii=False, indent=2)
-    return json_path
-def convert_single_excel_to_csv(excel_path, output_dir):
-    """Кон��ертация одного Excel файла в CSV для изображений"""
-    df = pd.read_excel(excel_path)
-    csv_filename = os.path.basename(excel_path).replace('.xlsx', '.csv').replace('.xls', '.csv')
-    csv_path = os.path.join(output_dir, csv_filename)
-    df.to_csv(csv_path, index=False, encoding='utf-8')
-    return csv_path

+from config import *
+from utils import log_message
+import json
+import pandas as pd
+import os
+def process_uploaded_file(file, file_type):
+    """Обработка загруженного файла и добавление в систему"""
+    try:
+        if file is None:
+            return "❌ Файл не выбран"
+        from huggingface_hub import HfApi
+        import tempfile
+        import shutil
+        # Создаем временную директорию
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Получаем путь к файлу (file может быть путем или объектом)
+            source_path = file if isinstance(file, str) else file.name
+            filename = os.path.basename(source_path)
+            # Создаем путь в временной директории
+            file_path = os.path.join(temp_dir, filename)
+            # Копируем только если источник и назначение разные
+            if os.path.abspath(source_path) != os.path.abspath(file_path):
+                shutil.copy(source_path, file_path)
+            else:
+                file_path = source_path
+            # Определяем целевую директорию на HuggingFace
+            if file_type == "Таблица":
+                target_dir = TABLE_DATA_DIR
+                # Конвертируем Excel в JSON
+                if filename.endswith(('.xlsx', '.xls')):
+                    json_path = convert_single_excel_to_json(file_path, temp_dir)
+                    upload_file = json_path
+                else:
+                    upload_file = file_path
+            elif file_type == "Изображение (метаданные)":
+                target_dir = IMAGE_DATA_DIR
+                # Конвертируем Excel в CSV
+                if filename.endswith(('.xlsx', '.xls')):
+                    csv_path = convert_single_excel_to_csv(file_path, temp_dir)
+                    upload_file = csv_path
+                else:
+                    upload_file = file_path
+            else:  # JSON документ
+                target_dir = JSON_FILES_DIR
+                upload_file = file_path
+            # Загружаем на HuggingFace
+            api = HfApi()
+            api.upload_file(
+                path_or_fileobj=upload_file,
+                path_in_repo=f"{target_dir}/{os.path.basename(upload_file)}",
+                repo_id=HF_REPO_ID,
+                token=HF_TOKEN,
+                repo_type="dataset"
+            )
+            log_message(f"Файл {filename} успешно загружен в {target_dir}")
+            return f"✅ Файл успешно загружен и обработан: {os.path.basename(upload_file)}\n⚠️ Перезапустите систему для применения изменений"
+    except Exception as e:
+        error_msg = f"Ошибка обработки файла: {str(e)}"
+        log_message(error_msg)
+        return f"❌ {error_msg}"
+def convert_single_excel_to_json(excel_path, output_dir):
+    """Конвертация одного Excel файла в JSON для таблиц"""
+    df_dict = pd.read_excel(excel_path, sheet_name=None)
+    result = {
+        "document": os.path.basename(excel_path),
+        "total_sheets": len(df_dict),
+        "sheets": []
+    }
+    for sheet_name, df in df_dict.items():
+        if df.empty or "Номер таблицы" not in df.columns:
+            continue
+        df = df.dropna(how='all').fillna("")
+        grouped = df.groupby("Номер таблицы")
+        for table_number, group in grouped:
+            group = group.reset_index(drop=True)
+            sheet_data = {
+                "sheet_name": sheet_name,
+                "document_id": str(group.iloc[0].get("Обозначение документа", "")),
+                "section": str(group.iloc[0].get("Раздел документа", "")),
+                "table_number": str(table_number),
+                "table_title": str(group.iloc[0].get("Название таблицы", "")),
+                "table_description": str(group.iloc[0].get("Примечание", "")),
+                "headers": [col for col in df.columns if col not in
+                           ["Обозначение документа", "Раздел документа", "Номер таблицы",
+                            "Название таблицы", "Приме��ание"]],
+                "data": []
+            }
+            for _, row in group.iterrows():
+                row_dict = {col: str(row[col]) if pd.notna(row[col]) else ""
+                           for col in sheet_data["headers"]}
+                sheet_data["data"].append(row_dict)
+            result["sheets"].append(sheet_data)
+    json_filename = os.path.basename(excel_path).replace('.xlsx', '.json').replace('.xls', '.json')
+    json_path = os.path.join(output_dir, json_filename)
+    with open(json_path, 'w', encoding='utf-8') as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+    return json_path
+def convert_single_excel_to_csv(excel_path, output_dir):
+    """Конвертация одного Excel файла в CSV для изображений"""
+    df = pd.read_excel(excel_path)
+    csv_filename = os.path.basename(excel_path).replace('.xlsx', '.csv').replace('.xls', '.csv')
+    csv_path = os.path.join(output_dir, csv_filename)
+    df.to_csv(csv_path, index=False, encoding='utf-8')
+    return csv_path