Spaces:

Ed5
/

Checklist-Generator

Sleeping

App Files Files Community

Checklist-Generator / app.py

Ed5

Update app.py

5b39d0c verified 19 days ago

raw

history blame contribute delete

18.2 kB

	import gradio as gr
	import pandas as pd
	import pdfplumber
	import os
	import tempfile
	import re
	from datetime import datetime
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import A4
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	from reportlab.lib import colors

	# --- ЛОГИКА ---

	class KDChecker:
	def __init__(self):
	self.excel_db = pd.DataFrame()
	self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]

	def load_excel_db(self, excel_path):
	print(f"--- Загрузка Excel: {excel_path} ---")
	if excel_path is None:
	return "Файл не выбран", gr.update(choices=[], value=None), gr.update(choices=[], value=None)

	all_data = []
	sheets_log = []

	try:
	xls = pd.read_excel(excel_path, sheet_name=None, header=None)

	for sheet_name, df_raw in xls.items():
	header_row_index = -1
	cab_col_idx = -1
	rem_col_idx = -1

	for i in range(min(20, len(df_raw))):
	row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
	c_idx = -1
	r_idx = -1
	for idx, val in enumerate(row_values):
	if "шкаф" in val or "cabinet" in val: c_idx = idx
	if "примечание" in val or "remark" in val: r_idx = idx
	if c_idx != -1 and r_idx != -1:
	header_row_index = i
	cab_col_idx = c_idx
	rem_col_idx = r_idx
	break

	if header_row_index != -1:
	df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index)
	df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]].copy()
	df_subset.columns = ["Cabinet", "Remark"]
	df_subset["Author"] = sheet_name

	df_subset["Cabinet"] = df_subset["Cabinet"].ffill()
	df_subset = df_subset.dropna(subset=["Remark"]).astype(str)
	df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply(
	lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
	)
	all_data.append(df_subset)
	sheets_log.append(f"'{sheet_name}': {len(df_subset)}")

	if not all_data:
	return "❌ Ошибка: Не найдены заголовки 'Шкаф' и 'Примечание'.", gr.update(choices=[]), gr.update(choices=[])

	self.excel_db = pd.concat(all_data, ignore_index=True)
	authors_list = sorted(self.excel_db["Author"].unique().tolist())

	print(f"Excel загружен. Всего строк: {len(self.excel_db)}")
	msg = f"✅ База загружена!\nЗаписей: {len(self.excel_db)}\nАвторы: {', '.join(sheets_log)}"

	return msg, gr.update(choices=authors_list, value=None, interactive=True), gr.update(choices=[], value=None)

	except Exception as e:
	print(f"Ошибка Excel: {e}")
	return f"❌ Ошибка: {e}", gr.update(choices=[]), gr.update(choices=[])

	def get_cabinets_by_author(self, author_name):
	if self.excel_db.empty or not author_name:
	return gr.update(choices=[], value=None)
	filtered_cabs = self.excel_db[self.excel_db["Author"] == author_name]["Cabinet"].unique().tolist()
	return gr.update(choices=sorted(filtered_cabs), value=None, interactive=True)

	def extract_text(self, pdf_path):
	text = ""
	try:
	with pdfplumber.open(pdf_path) as pdf:
	for page in pdf.pages[:5]:
	text += (page.extract_text() or "") + "\n"
	except Exception as e:
	print(f"Ошибка чтения PDF {pdf_path}: {e}")
	return text

	def find_all_decimal_numbers(self, text):
	matches = []

	# Шаблон 1: Специфичный (РЛТ.1.006.ША.030)
	# Ищет: Префикс + цифра + 3 цифры + буквы + 3 цифры
	pattern_custom = r"(РЛТ\|ЛДАР\|ВНАР\|ШТМ)[\s\.]\d{1}[\s\.]\d{3}[\s\.][А-ЯA-Z]{1,4}[\s\.]\d{3}(-[\d]+)?"

	# Шаблон 2: Стандартный ГОСТ (ЛДАР.421246.337)
	# Ищет: Префикс + точка + 6 цифр + точка + 3 цифры (допускаются пробелы вместо точек)
	pattern_gost = r"(РЛТ\|ЛДАР\|ВНАР\|ШТМ)[\s\.]\d{6}[\s\.]\d{3}"

	# Ищем по первому шаблону
	for match in re.finditer(pattern_custom, text):
	clean_num = match.group(0).replace(" ", "").replace("\n", "")
	if clean_num not in matches:
	matches.append(clean_num)

	# Ищем по второму шаблону
	for match in re.finditer(pattern_gost, text):
	clean_num = match.group(0).replace(" ", "").replace("\n", "")
	if clean_num not in matches:
	matches.append(clean_num)

	return matches

	def determine_doc_type(self, filename):
	fname = filename.upper()
	if "С2" in fname: return "С2"
	if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
	if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
	if "Э4" in fname: return "Э4"
	if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
	if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
	if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
	if "СБ" in fname: return "СБ"
	if "С5" in fname: return "С5"
	if "ОЛ" in fname: return "ОЛ"
	if "Э1" in fname: return "Э1"
	if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
	if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
	return "UNKNOWN"

	def get_remarks(self, cabinet_key, is_clean_key=True):
	if self.excel_db.empty: return {}
	if is_clean_key:
	target = cabinet_key.replace(" ", "")
	mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
	else:
	mask = self.excel_db['Cabinet'] == cabinet_key

	rows = self.excel_db[mask]
	if rows.empty: return {}
	parsed = {}
	for remark_cell in rows['Remark']:
	cell_text = str(remark_cell)
	cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text)
	items = re.split(r'(?:^\|\n)\s*(?=\d+[\.\)])', cell_text)
	for item in items:
	if len(item) < 3: continue
	clean_item = item.strip()
	clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
	doc_pattern = r'^(?:Документ\s+\|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]\|\s+)(.*)'
	match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE \| re.DOTALL)
	detected_docs = []
	final_text = clean_item
	if match:
	potential_docs_str = match.group(1).upper()
	cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ")
	parts = cleaned_codes.split()
	valid_parts = [p for p in parts if p in self.known_docs]
	if valid_parts:
	detected_docs = valid_parts
	final_text = match.group(2).strip()
	if not detected_docs: detected_docs = ["ALL"]
	for doc in detected_docs:
	if doc not in parsed: parsed[doc] = []
	parsed[doc].append(final_text)
	return parsed

	def check_files(self, files, manual_cabinet, progress=gr.Progress()):
	print("\n--- Начало проверки ---")
	if not files: return "Файлы не загружены", None
	if self.excel_db.empty: return "Сначала загрузите Excel базу!", None

	checklist = {}
	detected_cabinet = "Не определен"
	found_by_method = ""
	is_manual = False

	if manual_cabinet and manual_cabinet.strip():
	detected_cabinet = manual_cabinet
	found_by_method = "manual"
	is_manual = True
	else:
	db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())

	for file_path in progress.tqdm(files, desc="Поиск номера шкафа"):
	raw_text = self.extract_text(file_path)

	# --- ПОИСК ПО НОМЕРУ (2 ШАБЛОНА) ---
	pdf_numbers = self.find_all_decimal_numbers(raw_text)
	for cand in pdf_numbers:
	if cand in db_clean_keys:
	detected_cabinet = cand
	found_by_method = "number"
	break

	if found_by_method == "number":
	print(f"✅ Шкаф найден по номеру: {detected_cabinet}")
	break

	# --- ПОИСК ПО ИМЕНИ (УЛУЧШЕННЫЙ) ---
	# Убираем переносы строк, чтобы "Шкаф\nСАУ" стало "Шкаф САУ"
	flat_text = raw_text.replace("\n", " ").replace(" ", " ").lower()

	unique_cabinets = self.excel_db["Cabinet"].unique()
	for cab_name in unique_cabinets:
	# Ищем только если это похоже на название, а не на код
	if "ЛДАР" in cab_name or "РЛТ" in cab_name: continue

	# Проверяем точное вхождение названия
	clean_name = cab_name.lower().strip()
	if len(clean_name) > 5 and clean_name in flat_text:
	detected_cabinet = cab_name
	found_by_method = "name"
	print(f"✅ Шкаф найден по имени: {cab_name}")
	break

	if found_by_method == "name":
	break

	print(f"Определен шкаф: {detected_cabinet}")

	if detected_cabinet == "Не определен":
	return f"⚠️ Шкаф не опознан автоматически.\nВыберите Автора и Шкаф вручную.", None

	is_clean_search = (found_by_method == "number")
	remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search)

	if not remarks:
	return f"⚠️ Для шкафа '{detected_cabinet}' нет замечаний в базе.", None

	processed_count = 0
	for file_path in files:
	fname = os.path.basename(file_path)
	dtype = self.determine_doc_type(fname)
	tasks = []
	if dtype in remarks: tasks.extend(remarks[dtype])
	if "ALL" in remarks and dtype != "С2": tasks.extend(remarks["ALL"])
	if tasks:
	checklist[fname] = list(dict.fromkeys(tasks))
	processed_count += 1

	pdf_title = detected_cabinet
	if is_manual: pdf_title += " (Выбор вручную)"

	print("Генерация PDF...")
	try:
	pdf = self.create_pdf(pdf_title, checklist)
	except Exception as e:
	print(f"ОШИБКА PDF: {e}")
	return f"Ошибка создания PDF: {e}", None

	total = sum(len(v) for v in checklist.values())
	method_str = "Ручной выбор" if is_manual else ("По номеру" if is_clean_search else "По имени")

	return f"✅ Готово!\n📂 Шкаф: {detected_cabinet}\n🔍 Метод: {method_str}\n📄 Файлов: {processed_count}\n🚩 Замечаний: {total}", pdf

	def create_pdf(self, cabinet, data):
	fname = f"CheckList_Result.pdf"
	path = os.path.join(tempfile.gettempdir(), fname)
	c = canvas.Canvas(path, pagesize=A4)
	form = c.acroForm
	width, height = A4

	font_name = 'Helvetica'
	local_font = "arial.ttf"

	try:
	if os.path.exists(local_font):
	pdfmetrics.registerFont(TTFont('Arial', local_font))
	font_name = 'Arial'
	elif os.path.exists("C:\\Windows\\Fonts\\arial.ttf"):
	pdfmetrics.registerFont(TTFont('Arial', "C:\\Windows\\Fonts\\arial.ttf"))
	font_name = 'Arial'
	except:
	pass

	y = height - 50
	c.setFont(font_name, 16)
	c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
	y -= 25
	c.setFont(font_name, 12)
	disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet
	c.drawString(50, y, f"Шкаф: {disp_cab}")
	c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}")
	y -= 20
	c.line(50, y, width - 50, y)
	y -= 30

	if not data:
	c.drawString(50, y, "Нет замечаний.")
	c.save()
	return path

	cb_id = 0
	for filename, tasks in data.items():
	if y < 100: c.showPage(); y = height - 50; c.setFont(font_name, 12)
	c.setFillColor(colors.darkblue)
	c.setFont(font_name, 11)
	c.drawString(50, y, f"Файл: {filename}")
	c.setFillColor(colors.black)
	y -= 15
	c.setFont(font_name, 10)

	for task in tasks:
	paragraphs = task.split('\n')
	if y < 80: c.showPage(); y = height - 50; c.setFont(font_name, 10)

	form.checkbox(name=f"cb_{cb_id}", x=50, y=y - 10, size=10, buttonStyle='check', forceBorder=True, fillColor=colors.white)
	cb_id += 1

	text_start_y = y - 2

	for paragraph in paragraphs:
	max_len = 95
	lines = []
	words = paragraph.split(' ')
	cur_line = ""
	for w in words:
	if len(cur_line) + len(w) + 1 <= max_len:
	cur_line += w + " "
	else:
	lines.append(cur_line);
	cur_line = w + " "
	lines.append(cur_line)

	for l in lines:
	if text_start_y < 40: c.showPage(); text_start_y = height - 50; c.setFont(font_name, 10)
	c.drawString(65, text_start_y, l.strip())
	text_start_y -= 12
	y = text_start_y - 8
	y -= 10
	c.setStrokeColor(colors.lightgrey)
	c.line(50, y, width - 50, y)
	c.setStrokeColor(colors.black)
	y -= 20

	c.save()
	return path


	# --- ИНТЕРФЕЙС ---

	css = """
	.gradio-container { max-width: 95% !important; }
	.compact_file { height: 150px !important; min-height: 150px !important; max-height: 150px !important; overflow: hidden !important; }
	.orange_btn { background: #FF7F27 !important; border: none !important; color: white !important; font-weight: bold; }
	.orange_btn:hover { background: #E06010 !important; }
	footer { display: none !important; }
	"""

	def create_app():
	checker = KDChecker()

	# Добавляем css и theme СЮДА
	# Убрали css и theme отсюда
	with gr.Blocks(title="Генератор чек-листов КД") as app:
	gr.Markdown("## ✅ Генератор чек-листов КД")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 1. База технических замечаний")
	db_in = gr.File(label="Excel (.xlsx)", type="filepath", elem_classes="compact_file")
	with gr.Group():
	gr.Markdown("#### Ручной выбор:")
	author_dd = gr.Dropdown(label="1. Разработчик КД", choices=[], interactive=True)
	cabinet_dd = gr.Dropdown(label="2. Шкаф", choices=[], interactive=True)
	db_out = gr.Textbox(label="Статус базы", lines=2, max_lines=3, interactive=False)

	db_in.upload(checker.load_excel_db, inputs=[db_in], outputs=[db_out, author_dd, cabinet_dd])
	author_dd.change(checker.get_cabinets_by_author, inputs=[author_dd], outputs=[cabinet_dd])

	with gr.Column(scale=1):
	gr.Markdown("### 2. Документация")
	files_in = gr.File(label="Конструкторская документация (PDF)", file_count="multiple", type="filepath", elem_classes="compact_file")
	gr.Markdown("")
	btn = gr.Button("Сформировать чек-лист", variant="primary", elem_classes="orange_btn")

	gr.Markdown("### 3. Результат")
	with gr.Row():
	with gr.Column(scale=1):
	res_txt = gr.Textbox(label="Лог проверки", lines=5)
	with gr.Column(scale=1):
	res_pdf = gr.File(label="Скачать готовый PDF")

	btn.click(checker.check_files, inputs=[files_in, cabinet_dd], outputs=[res_txt, res_pdf])

	return app

	if __name__ == "__main__":
	app = create_app()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	css=css,
	theme=gr.themes.Soft(),
	ssr_mode=False
	)