Spaces:

DinaFatikh
/

inbuild3

Build error

App Files Files Community

Create app.py

by DinaFatikh - opened Nov 7, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+143

-0

Files changed (1) hide show

app.py +143 -0

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import gradio as gr
+import pdfplumber
+import pandas as pd
+import re
+import tempfile
+import os
+# Функция для извлечения разделов и стоимости из PDF
+def extract_sections_only(pdf_file_path):
+    sections = []
+    smeta_number = None
+    smeta_name = None
+    collecting_estimate = False
+    current_estimate_lines = []
+    found_section = False
+    with pdfplumber.open(pdf_file_path) as pdf:
+        for page in pdf.pages:
+            text = page.extract_text()
+            if text is None:
+                continue
+            lines = text.split('\n')
+            for line in lines:
+                if "ЛОКАЛЬНАЯ СМЕТА №" in line:
+                    if smeta_number is not None:
+                        if not found_section:
+                            total_line = next((l for l in current_estimate_lines if "ВСЕГО ПО СМЕТЕ:" in l), None)
+                            if total_line:
+                                try:
+                                    total_cost_str = total_line.split("ВСЕГО ПО СМЕТЕ:")[1].strip()
+                                    total_cost = float(total_cost_str.replace(" ", "").replace(",", "."))
+                                    section_data = [smeta_number, smeta_name, "нет разделов", total_cost]
+                                    if section_data not in sections:
+                                        sections.append(section_data)
+                                except (ValueError, IndexError):
+                                    pass
+                        smeta_number = None
+                        smeta_name = None
+                        current_estimate_lines = []
+                        found_section = False
+                    smeta_number = line.split("ЛОКАЛЬНАЯ СМЕТА №")[1].strip()
+                    collecting_estimate = True
+                    current_estimate_lines.append(line)
+                elif collecting_estimate and "на" in line:
+                    try:
+                        smeta_name = line.split("на", 1)[1].strip()
+                    except IndexError:
+                        smeta_name = "Не найдено наименование"
+                    collecting_estimate = False
+                    current_estimate_lines.append(line)
+                elif smeta_number:
+                    current_estimate_lines.append(line)
+                    if re.search(r'\bРаздел\b', line):
+                        found_section = True
+                        try:
+                            parts = line.split()
+                            section_index = parts.index("Раздел")
+                            section_name = " ".join(parts[section_index + 1:-1]).strip()
+                            section_cost_str = parts[-1].replace(" ", "").replace(",", ".")
+                            section_cost = float(section_cost_str)
+                            section_data = [smeta_number, smeta_name, section_name, section_cost]
+                            if section_data not in sections:
+                                sections.append(section_data)
+                        except (ValueError, IndexError):
+                            continue
+        if smeta_number:
+            if not found_section:
+                total_line = next((l for l in current_estimate_lines if "ВСЕГО ПО СМЕТЕ:" in l), None)
+                if total_line:
+                    try:
+                        total_cost_str = total_line.split("ВСЕГО ПО СМЕТЕ:")[1].strip()
+                        total_cost = float(total_cost_str.replace(" ", "").replace(",", "."))
+                        section_data = [smeta_number, smeta_name, "нет разделов", total_cost]
+                        if section_data not in sections:
+                            sections.append(section_data)
+                    except (ValueError, IndexError):
+                        pass
+    df = pd.DataFrame(sections, columns=[
+        "Номер сметы", "Наименование сметы", "Раздел", "Стоимость"
+    ])
+    df = df.drop_duplicates()
+    df = df.sort_values(by=["Номер сметы", "Раздел"])
+    # Форматирование столбца "Стоимость" с разделителями тысяч
+    df["Стоимость"] = df["Стоимость"].apply(lambda x: f"{x:,.0f}".replace(",", " "))
+    return df
+# Функция для отображения результата и скачивания в Gradio
+def display_estimates_with_sections(pdf_file_path):
+    # Извлекаем данные из PDF
+    df = extract_sections_only(pdf_file_path)
+    # Сохраняем данные в Excel временный файл
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
+        # Создаем ExcelWriter и применяем форматирование
+        with pd.ExcelWriter(tmp.name, engine='xlsxwriter') as writer:
+            df.to_excel(writer, index=False, sheet_name='Структура проекта')
+            # Получаем workbook и worksheet для применения форматирования
+            workbook = writer.book
+            worksheet = writer.sheets['Структура проекта']
+            # Определяем формат с разделителями тысяч
+            money_format = workbook.add_format({'num_format': '# ##0'})
+            # Применяем формат к столбцу "Стоимость"
+            стоимость_col_idx = df.columns.get_loc("Стоимость")
+            worksheet.set_column(стоимость_col_idx, стоимость_col_idx, None, money_format)
+    temp_file_path = tmp.name
+    return df, temp_file_path
+# Создаем интерфейс Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("# СТРУКТУРА ПРОЕКТА")
+    with gr.Column():
+        # Окно загрузки файла
+        pdf_input = gr.File(label="Загрузите PDF файл с локальными сметами", type="filepath")
+        # Кнопка обработки
+        submit_button = gr.Button("Обработать")
+        # Таблица с результатами
+        output_df = gr.Dataframe(headers=["Номер сметы", "Наименование сметы", "Раздел", "Стоимость"])
+        # Кнопка скачивания Excel-файла
+        download_button = gr.File(label="Скачать Excel")
+    # Связываем кнопку с функцией обработки
+    submit_button.click(
+        fn=display_estimates_with_sections,
+        inputs=pdf_input,
+        outputs=[output_df, download_button]
+    )
+# Запуск приложения
+demo.launch(share=True)