Spaces:

DinaFatikh
/

inbuild3

Build error

App Files Files Community

inbuild3 / app.py

DinaFatikh

Update app.py

50c615a verified 11 months ago

raw

history blame contribute delete

7.21 kB

	# app.py ────────────────
	import gradio as gr
	import pdfplumber
	import pandas as pd
	import re
	import tempfile
	import os


	# ────────────────────────────────────────────────────────────────────────────
	# 1. Извлекаем разделы и их стоимость из PDF
	# ────────────────────────────────────────────────────────────────────────────
	def extract_sections_only(pdf_file_path: str) -> pd.DataFrame:
	sections = []
	smeta_number = smeta_name = None
	collecting_estimate = False
	current_lines, found_section = [], False

	with pdfplumber.open(pdf_file_path) as pdf:
	for page in pdf.pages:
	text = page.extract_text() or ""
	for line in text.split("\n"):

	# начало новой локальной сметы
	if "ЛОКАЛЬНАЯ СМЕТА №" in line:
	# если предыдущая смета закончилась без разделов — добавляем "нет разделов"
	if smeta_number and not found_section:
	_append_total_if_any(current_lines, smeta_number, smeta_name, sections)

	# сброс состояния и регистрация новой сметы
	smeta_number = line.split("ЛОКАЛЬНАЯ СМЕТА №")[1].strip()
	smeta_name = None
	collecting_estimate, found_section = True, False
	current_lines = [line]
	continue

	# строка с наименованием сметы ("… на …")
	if collecting_estimate and "на" in line:
	smeta_name = line.split("на", 1)[1].strip() or "Без названия"
	collecting_estimate = False
	current_lines.append(line)
	continue

	# внутри текущей сметы
	if smeta_number:
	current_lines.append(line)

	# строки вида "Раздел … … … 123 456,78"
	if re.search(r"\bРаздел\b", line):
	found_section = True
	try:
	parts = line.split()
	idx = parts.index("Раздел")
	section_name = " ".join(parts[idx + 1:-1]).strip()
	section_cost = float(parts[-1].replace(" ", "").replace(",", "."))
	sections.append([smeta_number, smeta_name, section_name, section_cost])
	except (ValueError, IndexError):
	pass

	# последняя смета, если без разделов
	if smeta_number and not found_section:
	_append_total_if_any(current_lines, smeta_number, smeta_name, sections)

	df = (
	pd.DataFrame(
	sections,
	columns=["Номер сметы", "Наименование сметы", "Раздел", "Стоимость"]
	)
	.drop_duplicates()
	.sort_values(["Номер сметы", "Раздел"])
	)

	# форматируем «Стоимость» как 1 234 567
	df["Стоимость"] = df["Стоимость"].apply(lambda x: f"{x:,.0f}".replace(",", " "))
	return df


	def _append_total_if_any(lines, smeta_number, smeta_name, sections):
	"""Добавляет строку 'нет разделов' с общей суммой, если в смете нет отдельных разделов."""
	total_line = next((l for l in lines if "ВСЕГО ПО СМЕТЕ:" in l), None)
	if total_line:
	try:
	total_cost = float(
	total_line.split("ВСЕГО ПО СМЕТЕ:")[1].strip()
	.replace(" ", "").replace(",", ".")
	)
	sections.append([smeta_number, smeta_name, "нет разделов", total_cost])
	except (ValueError, IndexError):
	pass


	# ────────────────────────────────────────────────────────────────────────────
	# 2. Готовим Excel и отдаём таблицу + файл
	# ────────────────────────────────────────────────────────────────────────────
	def display_estimates_with_sections(pdf_file_path: str):
	df = extract_sections_only(pdf_file_path)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
	with pd.ExcelWriter(tmp.name, engine="xlsxwriter") as writer:
	df.to_excel(writer, index=False, sheet_name="Структура проекта")
	workbook = writer.book
	worksheet = writer.sheets["Структура проекта"]
	money_fmt = workbook.add_format({"num_format": "# ##0"})
	col_idx = df.columns.get_loc("Стоимость")
	worksheet.set_column(col_idx, col_idx, None, money_fmt)

	excel_path = tmp.name # сохраняем путь

	return df, excel_path


	# ────────────────────────────────────────────────────────────────────────────
	# 3. Интерфейс Gradio
	# ────────────────────────────────────────────────────────────────────────────
	with gr.Blocks() as demo:
	gr.Markdown("# СТРУКТУРА ПРОЕКТА")

	with gr.Column():
	pdf_input = gr.File(label="Загрузите PDF файл с локальными сметами", type="filepath")
	submit_button = gr.Button("Обработать")
	output_df = gr.Dataframe(headers=["Номер сметы", "Наименование сметы", "Раздел", "Стоимость"])
	download_button = gr.File(label="Скачать Excel")

	submit_button.click(
	fn=display_estimates_with_sections,
	inputs=pdf_input,
	outputs=[output_df, download_button]
	)


	# ────────────────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	demo.launch() # для Hugging Face Spaces уберите share=True