import gradio as gr import pandas as pd import pdfplumber import os import tempfile import re from datetime import datetime from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib import colors # --- ЛОГИКА --- class KDChecker: def __init__(self): self.excel_db = pd.DataFrame() self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"] def load_excel_db(self, excel_path): print(f"--- Загрузка Excel: {excel_path} ---") if excel_path is None: return "Файл не выбран", gr.update(choices=[], value=None), gr.update(choices=[], value=None) all_data = [] sheets_log = [] try: xls = pd.read_excel(excel_path, sheet_name=None, header=None) for sheet_name, df_raw in xls.items(): header_row_index = -1 cab_col_idx = -1 rem_col_idx = -1 for i in range(min(20, len(df_raw))): row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values] c_idx = -1 r_idx = -1 for idx, val in enumerate(row_values): if "шкаф" in val or "cabinet" in val: c_idx = idx if "примечание" in val or "remark" in val: r_idx = idx if c_idx != -1 and r_idx != -1: header_row_index = i cab_col_idx = c_idx rem_col_idx = r_idx break if header_row_index != -1: df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index) df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]].copy() df_subset.columns = ["Cabinet", "Remark"] df_subset["Author"] = sheet_name df_subset["Cabinet"] = df_subset["Cabinet"].ffill() df_subset = df_subset.dropna(subset=["Remark"]).astype(str) df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply( lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "") ) all_data.append(df_subset) sheets_log.append(f"'{sheet_name}': {len(df_subset)}") if not all_data: return "❌ Ошибка: Не найдены заголовки 'Шкаф' и 'Примечание'.", gr.update(choices=[]), gr.update(choices=[]) self.excel_db = pd.concat(all_data, ignore_index=True) authors_list = sorted(self.excel_db["Author"].unique().tolist()) print(f"Excel загружен. Всего строк: {len(self.excel_db)}") msg = f"✅ База загружена!\nЗаписей: {len(self.excel_db)}\nАвторы: {', '.join(sheets_log)}" return msg, gr.update(choices=authors_list, value=None, interactive=True), gr.update(choices=[], value=None) except Exception as e: print(f"Ошибка Excel: {e}") return f"❌ Ошибка: {e}", gr.update(choices=[]), gr.update(choices=[]) def get_cabinets_by_author(self, author_name): if self.excel_db.empty or not author_name: return gr.update(choices=[], value=None) filtered_cabs = self.excel_db[self.excel_db["Author"] == author_name]["Cabinet"].unique().tolist() return gr.update(choices=sorted(filtered_cabs), value=None, interactive=True) def extract_text(self, pdf_path): text = "" try: with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages[:5]: text += (page.extract_text() or "") + "\n" except Exception as e: print(f"Ошибка чтения PDF {pdf_path}: {e}") return text def find_all_decimal_numbers(self, text): matches = [] # Шаблон 1: Специфичный (РЛТ.1.006.ША.030) # Ищет: Префикс + цифра + 3 цифры + буквы + 3 цифры pattern_custom = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?" # Шаблон 2: Стандартный ГОСТ (ЛДАР.421246.337) # Ищет: Префикс + точка + 6 цифр + точка + 3 цифры (допускаются пробелы вместо точек) pattern_gost = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{6}[\s\.]*\d{3}" # Ищем по первому шаблону for match in re.finditer(pattern_custom, text): clean_num = match.group(0).replace(" ", "").replace("\n", "") if clean_num not in matches: matches.append(clean_num) # Ищем по второму шаблону for match in re.finditer(pattern_gost, text): clean_num = match.group(0).replace(" ", "").replace("\n", "") if clean_num not in matches: matches.append(clean_num) return matches def determine_doc_type(self, filename): fname = filename.upper() if "С2" in fname: return "С2" if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3" if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3" if "Э4" in fname: return "Э4" if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4" if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО" if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5" if "СБ" in fname: return "СБ" if "С5" in fname: return "С5" if "ОЛ" in fname: return "ОЛ" if "Э1" in fname: return "Э1" if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6" if "Д3" in fname or "МОНТАЖ" in fname: return "Д3" return "UNKNOWN" def get_remarks(self, cabinet_key, is_clean_key=True): if self.excel_db.empty: return {} if is_clean_key: target = cabinet_key.replace(" ", "") mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False) else: mask = self.excel_db['Cabinet'] == cabinet_key rows = self.excel_db[mask] if rows.empty: return {} parsed = {} for remark_cell in rows['Remark']: cell_text = str(remark_cell) cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text) items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', cell_text) for item in items: if len(item) < 3: continue clean_item = item.strip() clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item) doc_pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)' match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE | re.DOTALL) detected_docs = [] final_text = clean_item if match: potential_docs_str = match.group(1).upper() cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ") parts = cleaned_codes.split() valid_parts = [p for p in parts if p in self.known_docs] if valid_parts: detected_docs = valid_parts final_text = match.group(2).strip() if not detected_docs: detected_docs = ["ALL"] for doc in detected_docs: if doc not in parsed: parsed[doc] = [] parsed[doc].append(final_text) return parsed def check_files(self, files, manual_cabinet, progress=gr.Progress()): print("\n--- Начало проверки ---") if not files: return "Файлы не загружены", None if self.excel_db.empty: return "Сначала загрузите Excel базу!", None checklist = {} detected_cabinet = "Не определен" found_by_method = "" is_manual = False if manual_cabinet and manual_cabinet.strip(): detected_cabinet = manual_cabinet found_by_method = "manual" is_manual = True else: db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist()) for file_path in progress.tqdm(files, desc="Поиск номера шкафа"): raw_text = self.extract_text(file_path) # --- ПОИСК ПО НОМЕРУ (2 ШАБЛОНА) --- pdf_numbers = self.find_all_decimal_numbers(raw_text) for cand in pdf_numbers: if cand in db_clean_keys: detected_cabinet = cand found_by_method = "number" break if found_by_method == "number": print(f"✅ Шкаф найден по номеру: {detected_cabinet}") break # --- ПОИСК ПО ИМЕНИ (УЛУЧШЕННЫЙ) --- # Убираем переносы строк, чтобы "Шкаф\nСАУ" стало "Шкаф САУ" flat_text = raw_text.replace("\n", " ").replace(" ", " ").lower() unique_cabinets = self.excel_db["Cabinet"].unique() for cab_name in unique_cabinets: # Ищем только если это похоже на название, а не на код if "ЛДАР" in cab_name or "РЛТ" in cab_name: continue # Проверяем точное вхождение названия clean_name = cab_name.lower().strip() if len(clean_name) > 5 and clean_name in flat_text: detected_cabinet = cab_name found_by_method = "name" print(f"✅ Шкаф найден по имени: {cab_name}") break if found_by_method == "name": break print(f"Определен шкаф: {detected_cabinet}") if detected_cabinet == "Не определен": return f"⚠️ Шкаф не опознан автоматически.\nВыберите Автора и Шкаф вручную.", None is_clean_search = (found_by_method == "number") remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search) if not remarks: return f"⚠️ Для шкафа '{detected_cabinet}' нет замечаний в базе.", None processed_count = 0 for file_path in files: fname = os.path.basename(file_path) dtype = self.determine_doc_type(fname) tasks = [] if dtype in remarks: tasks.extend(remarks[dtype]) if "ALL" in remarks and dtype != "С2": tasks.extend(remarks["ALL"]) if tasks: checklist[fname] = list(dict.fromkeys(tasks)) processed_count += 1 pdf_title = detected_cabinet if is_manual: pdf_title += " (Выбор вручную)" print("Генерация PDF...") try: pdf = self.create_pdf(pdf_title, checklist) except Exception as e: print(f"ОШИБКА PDF: {e}") return f"Ошибка создания PDF: {e}", None total = sum(len(v) for v in checklist.values()) method_str = "Ручной выбор" if is_manual else ("По номеру" if is_clean_search else "По имени") return f"✅ Готово!\n📂 Шкаф: {detected_cabinet}\n🔍 Метод: {method_str}\n📄 Файлов: {processed_count}\n🚩 Замечаний: {total}", pdf def create_pdf(self, cabinet, data): fname = f"CheckList_Result.pdf" path = os.path.join(tempfile.gettempdir(), fname) c = canvas.Canvas(path, pagesize=A4) form = c.acroForm width, height = A4 font_name = 'Helvetica' local_font = "arial.ttf" try: if os.path.exists(local_font): pdfmetrics.registerFont(TTFont('Arial', local_font)) font_name = 'Arial' elif os.path.exists("C:\\Windows\\Fonts\\arial.ttf"): pdfmetrics.registerFont(TTFont('Arial', "C:\\Windows\\Fonts\\arial.ttf")) font_name = 'Arial' except: pass y = height - 50 c.setFont(font_name, 16) c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД") y -= 25 c.setFont(font_name, 12) disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet c.drawString(50, y, f"Шкаф: {disp_cab}") c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}") y -= 20 c.line(50, y, width - 50, y) y -= 30 if not data: c.drawString(50, y, "Нет замечаний.") c.save() return path cb_id = 0 for filename, tasks in data.items(): if y < 100: c.showPage(); y = height - 50; c.setFont(font_name, 12) c.setFillColor(colors.darkblue) c.setFont(font_name, 11) c.drawString(50, y, f"Файл: {filename}") c.setFillColor(colors.black) y -= 15 c.setFont(font_name, 10) for task in tasks: paragraphs = task.split('\n') if y < 80: c.showPage(); y = height - 50; c.setFont(font_name, 10) form.checkbox(name=f"cb_{cb_id}", x=50, y=y - 10, size=10, buttonStyle='check', forceBorder=True, fillColor=colors.white) cb_id += 1 text_start_y = y - 2 for paragraph in paragraphs: max_len = 95 lines = [] words = paragraph.split(' ') cur_line = "" for w in words: if len(cur_line) + len(w) + 1 <= max_len: cur_line += w + " " else: lines.append(cur_line); cur_line = w + " " lines.append(cur_line) for l in lines: if text_start_y < 40: c.showPage(); text_start_y = height - 50; c.setFont(font_name, 10) c.drawString(65, text_start_y, l.strip()) text_start_y -= 12 y = text_start_y - 8 y -= 10 c.setStrokeColor(colors.lightgrey) c.line(50, y, width - 50, y) c.setStrokeColor(colors.black) y -= 20 c.save() return path # --- ИНТЕРФЕЙС --- css = """ .gradio-container { max-width: 95% !important; } .compact_file { height: 150px !important; min-height: 150px !important; max-height: 150px !important; overflow: hidden !important; } .orange_btn { background: #FF7F27 !important; border: none !important; color: white !important; font-weight: bold; } .orange_btn:hover { background: #E06010 !important; } footer { display: none !important; } """ def create_app(): checker = KDChecker() # Добавляем css и theme СЮДА # Убрали css и theme отсюда with gr.Blocks(title="Генератор чек-листов КД") as app: gr.Markdown("## ✅ Генератор чек-листов КД") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 1. База технических замечаний") db_in = gr.File(label="Excel (.xlsx)", type="filepath", elem_classes="compact_file") with gr.Group(): gr.Markdown("#### Ручной выбор:") author_dd = gr.Dropdown(label="1. Разработчик КД", choices=[], interactive=True) cabinet_dd = gr.Dropdown(label="2. Шкаф", choices=[], interactive=True) db_out = gr.Textbox(label="Статус базы", lines=2, max_lines=3, interactive=False) db_in.upload(checker.load_excel_db, inputs=[db_in], outputs=[db_out, author_dd, cabinet_dd]) author_dd.change(checker.get_cabinets_by_author, inputs=[author_dd], outputs=[cabinet_dd]) with gr.Column(scale=1): gr.Markdown("### 2. Документация") files_in = gr.File(label="Конструкторская документация (PDF)", file_count="multiple", type="filepath", elem_classes="compact_file") gr.Markdown("") btn = gr.Button("Сформировать чек-лист", variant="primary", elem_classes="orange_btn") gr.Markdown("### 3. Результат") with gr.Row(): with gr.Column(scale=1): res_txt = gr.Textbox(label="Лог проверки", lines=5) with gr.Column(scale=1): res_pdf = gr.File(label="Скачать готовый PDF") btn.click(checker.check_files, inputs=[files_in, cabinet_dd], outputs=[res_txt, res_pdf]) return app if __name__ == "__main__": app = create_app() app.launch( server_name="0.0.0.0", server_port=7860, css=css, theme=gr.themes.Soft(), ssr_mode=False )