Spaces:
Build error
Build error
Create app.py
#1
by DinaFatikh - opened
app.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pdfplumber
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import re
|
| 5 |
+
import tempfile
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Функция для извлечения разделов и стоимости из PDF
|
| 9 |
+
def extract_sections_only(pdf_file_path):
|
| 10 |
+
sections = []
|
| 11 |
+
smeta_number = None
|
| 12 |
+
smeta_name = None
|
| 13 |
+
collecting_estimate = False
|
| 14 |
+
current_estimate_lines = []
|
| 15 |
+
found_section = False
|
| 16 |
+
|
| 17 |
+
with pdfplumber.open(pdf_file_path) as pdf:
|
| 18 |
+
for page in pdf.pages:
|
| 19 |
+
text = page.extract_text()
|
| 20 |
+
if text is None:
|
| 21 |
+
continue
|
| 22 |
+
lines = text.split('\n')
|
| 23 |
+
|
| 24 |
+
for line in lines:
|
| 25 |
+
if "ЛОКАЛЬНАЯ СМЕТА №" in line:
|
| 26 |
+
if smeta_number is not None:
|
| 27 |
+
if not found_section:
|
| 28 |
+
total_line = next((l for l in current_estimate_lines if "ВСЕГО ПО СМЕТЕ:" in l), None)
|
| 29 |
+
if total_line:
|
| 30 |
+
try:
|
| 31 |
+
total_cost_str = total_line.split("ВСЕГО ПО СМЕТЕ:")[1].strip()
|
| 32 |
+
total_cost = float(total_cost_str.replace(" ", "").replace(",", "."))
|
| 33 |
+
section_data = [smeta_number, smeta_name, "нет разделов", total_cost]
|
| 34 |
+
if section_data not in sections:
|
| 35 |
+
sections.append(section_data)
|
| 36 |
+
except (ValueError, IndexError):
|
| 37 |
+
pass
|
| 38 |
+
smeta_number = None
|
| 39 |
+
smeta_name = None
|
| 40 |
+
current_estimate_lines = []
|
| 41 |
+
found_section = False
|
| 42 |
+
|
| 43 |
+
smeta_number = line.split("ЛОКАЛЬНАЯ СМЕТА №")[1].strip()
|
| 44 |
+
collecting_estimate = True
|
| 45 |
+
current_estimate_lines.append(line)
|
| 46 |
+
|
| 47 |
+
elif collecting_estimate and "на" in line:
|
| 48 |
+
try:
|
| 49 |
+
smeta_name = line.split("на", 1)[1].strip()
|
| 50 |
+
except IndexError:
|
| 51 |
+
smeta_name = "Не найдено наименование"
|
| 52 |
+
collecting_estimate = False
|
| 53 |
+
current_estimate_lines.append(line)
|
| 54 |
+
|
| 55 |
+
elif smeta_number:
|
| 56 |
+
current_estimate_lines.append(line)
|
| 57 |
+
if re.search(r'\bРаздел\b', line):
|
| 58 |
+
found_section = True
|
| 59 |
+
try:
|
| 60 |
+
parts = line.split()
|
| 61 |
+
section_index = parts.index("Раздел")
|
| 62 |
+
section_name = " ".join(parts[section_index + 1:-1]).strip()
|
| 63 |
+
section_cost_str = parts[-1].replace(" ", "").replace(",", ".")
|
| 64 |
+
section_cost = float(section_cost_str)
|
| 65 |
+
section_data = [smeta_number, smeta_name, section_name, section_cost]
|
| 66 |
+
if section_data not in sections:
|
| 67 |
+
sections.append(section_data)
|
| 68 |
+
except (ValueError, IndexError):
|
| 69 |
+
continue
|
| 70 |
+
|
| 71 |
+
if smeta_number:
|
| 72 |
+
if not found_section:
|
| 73 |
+
total_line = next((l for l in current_estimate_lines if "ВСЕГО ПО СМЕТЕ:" in l), None)
|
| 74 |
+
if total_line:
|
| 75 |
+
try:
|
| 76 |
+
total_cost_str = total_line.split("ВСЕГО ПО СМЕТЕ:")[1].strip()
|
| 77 |
+
total_cost = float(total_cost_str.replace(" ", "").replace(",", "."))
|
| 78 |
+
section_data = [smeta_number, smeta_name, "нет разделов", total_cost]
|
| 79 |
+
if section_data not in sections:
|
| 80 |
+
sections.append(section_data)
|
| 81 |
+
except (ValueError, IndexError):
|
| 82 |
+
pass
|
| 83 |
+
|
| 84 |
+
df = pd.DataFrame(sections, columns=[
|
| 85 |
+
"Номер сметы", "Наименование сметы", "Раздел", "Стоимость"
|
| 86 |
+
])
|
| 87 |
+
df = df.drop_duplicates()
|
| 88 |
+
df = df.sort_values(by=["Номер сметы", "Раздел"])
|
| 89 |
+
|
| 90 |
+
# Форматирование столбца "Стоимость" с разделителями тысяч
|
| 91 |
+
df["Стоимость"] = df["Стоимость"].apply(lambda x: f"{x:,.0f}".replace(",", " "))
|
| 92 |
+
|
| 93 |
+
return df
|
| 94 |
+
|
| 95 |
+
# Функция для отображения результата и скачивания в Gradio
|
| 96 |
+
def display_estimates_with_sections(pdf_file_path):
|
| 97 |
+
# Извлекаем данные из PDF
|
| 98 |
+
df = extract_sections_only(pdf_file_path)
|
| 99 |
+
|
| 100 |
+
# Сохраняем данные в Excel временный файл
|
| 101 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
|
| 102 |
+
# Создаем ExcelWriter и применяем форматирование
|
| 103 |
+
with pd.ExcelWriter(tmp.name, engine='xlsxwriter') as writer:
|
| 104 |
+
df.to_excel(writer, index=False, sheet_name='Структура проекта')
|
| 105 |
+
|
| 106 |
+
# Получаем workbook и worksheet для применения форматирования
|
| 107 |
+
workbook = writer.book
|
| 108 |
+
worksheet = writer.sheets['Структура проекта']
|
| 109 |
+
|
| 110 |
+
# Определяем формат с разделителями тысяч
|
| 111 |
+
money_format = workbook.add_format({'num_format': '# ##0'})
|
| 112 |
+
|
| 113 |
+
# Применяем формат к столбцу "Стоимость"
|
| 114 |
+
стоимость_col_idx = df.columns.get_loc("Стоимость")
|
| 115 |
+
worksheet.set_column(стоимость_col_idx, стоимость_col_idx, None, money_format)
|
| 116 |
+
|
| 117 |
+
temp_file_path = tmp.name
|
| 118 |
+
|
| 119 |
+
return df, temp_file_path
|
| 120 |
+
|
| 121 |
+
# Создаем интерфейс Gradio
|
| 122 |
+
with gr.Blocks() as demo:
|
| 123 |
+
gr.Markdown("# СТРУКТУРА ПРОЕКТА")
|
| 124 |
+
|
| 125 |
+
with gr.Column():
|
| 126 |
+
# Окно загрузки файла
|
| 127 |
+
pdf_input = gr.File(label="Загрузите PDF файл с локальными сметами", type="filepath")
|
| 128 |
+
# Кнопка обработки
|
| 129 |
+
submit_button = gr.Button("Обработать")
|
| 130 |
+
# Таблица с результатами
|
| 131 |
+
output_df = gr.Dataframe(headers=["Номер сметы", "Наименование сметы", "Раздел", "Стоимость"])
|
| 132 |
+
# Кнопка скачивания Excel-файла
|
| 133 |
+
download_button = gr.File(label="Скачать Excel")
|
| 134 |
+
|
| 135 |
+
# Связываем кнопку с функцией обработки
|
| 136 |
+
submit_button.click(
|
| 137 |
+
fn=display_estimates_with_sections,
|
| 138 |
+
inputs=pdf_input,
|
| 139 |
+
outputs=[output_df, download_button]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Запуск приложения
|
| 143 |
+
demo.launch(share=True)
|