Ed5's picture
Update app.py
5b39d0c verified
import gradio as gr
import pandas as pd
import pdfplumber
import os
import tempfile
import re
from datetime import datetime
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib import colors
# --- ЛОГИКА ---
class KDChecker:
def __init__(self):
self.excel_db = pd.DataFrame()
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
def load_excel_db(self, excel_path):
print(f"--- Загрузка Excel: {excel_path} ---")
if excel_path is None:
return "Файл не выбран", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
all_data = []
sheets_log = []
try:
xls = pd.read_excel(excel_path, sheet_name=None, header=None)
for sheet_name, df_raw in xls.items():
header_row_index = -1
cab_col_idx = -1
rem_col_idx = -1
for i in range(min(20, len(df_raw))):
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
c_idx = -1
r_idx = -1
for idx, val in enumerate(row_values):
if "шкаф" in val or "cabinet" in val: c_idx = idx
if "примечание" in val or "remark" in val: r_idx = idx
if c_idx != -1 and r_idx != -1:
header_row_index = i
cab_col_idx = c_idx
rem_col_idx = r_idx
break
if header_row_index != -1:
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index)
df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]].copy()
df_subset.columns = ["Cabinet", "Remark"]
df_subset["Author"] = sheet_name
df_subset["Cabinet"] = df_subset["Cabinet"].ffill()
df_subset = df_subset.dropna(subset=["Remark"]).astype(str)
df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply(
lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
)
all_data.append(df_subset)
sheets_log.append(f"'{sheet_name}': {len(df_subset)}")
if not all_data:
return "❌ Ошибка: Не найдены заголовки 'Шкаф' и 'Примечание'.", gr.update(choices=[]), gr.update(choices=[])
self.excel_db = pd.concat(all_data, ignore_index=True)
authors_list = sorted(self.excel_db["Author"].unique().tolist())
print(f"Excel загружен. Всего строк: {len(self.excel_db)}")
msg = f"✅ База загружена!\nЗаписей: {len(self.excel_db)}\nАвторы: {', '.join(sheets_log)}"
return msg, gr.update(choices=authors_list, value=None, interactive=True), gr.update(choices=[], value=None)
except Exception as e:
print(f"Ошибка Excel: {e}")
return f"❌ Ошибка: {e}", gr.update(choices=[]), gr.update(choices=[])
def get_cabinets_by_author(self, author_name):
if self.excel_db.empty or not author_name:
return gr.update(choices=[], value=None)
filtered_cabs = self.excel_db[self.excel_db["Author"] == author_name]["Cabinet"].unique().tolist()
return gr.update(choices=sorted(filtered_cabs), value=None, interactive=True)
def extract_text(self, pdf_path):
text = ""
try:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages[:5]:
text += (page.extract_text() or "") + "\n"
except Exception as e:
print(f"Ошибка чтения PDF {pdf_path}: {e}")
return text
def find_all_decimal_numbers(self, text):
matches = []
# Шаблон 1: Специфичный (РЛТ.1.006.ША.030)
# Ищет: Префикс + цифра + 3 цифры + буквы + 3 цифры
pattern_custom = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?"
# Шаблон 2: Стандартный ГОСТ (ЛДАР.421246.337)
# Ищет: Префикс + точка + 6 цифр + точка + 3 цифры (допускаются пробелы вместо точек)
pattern_gost = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{6}[\s\.]*\d{3}"
# Ищем по первому шаблону
for match in re.finditer(pattern_custom, text):
clean_num = match.group(0).replace(" ", "").replace("\n", "")
if clean_num not in matches:
matches.append(clean_num)
# Ищем по второму шаблону
for match in re.finditer(pattern_gost, text):
clean_num = match.group(0).replace(" ", "").replace("\n", "")
if clean_num not in matches:
matches.append(clean_num)
return matches
def determine_doc_type(self, filename):
fname = filename.upper()
if "С2" in fname: return "С2"
if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
if "Э4" in fname: return "Э4"
if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
if "СБ" in fname: return "СБ"
if "С5" in fname: return "С5"
if "ОЛ" in fname: return "ОЛ"
if "Э1" in fname: return "Э1"
if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
return "UNKNOWN"
def get_remarks(self, cabinet_key, is_clean_key=True):
if self.excel_db.empty: return {}
if is_clean_key:
target = cabinet_key.replace(" ", "")
mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
else:
mask = self.excel_db['Cabinet'] == cabinet_key
rows = self.excel_db[mask]
if rows.empty: return {}
parsed = {}
for remark_cell in rows['Remark']:
cell_text = str(remark_cell)
cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text)
items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', cell_text)
for item in items:
if len(item) < 3: continue
clean_item = item.strip()
clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
doc_pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)'
match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE | re.DOTALL)
detected_docs = []
final_text = clean_item
if match:
potential_docs_str = match.group(1).upper()
cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ")
parts = cleaned_codes.split()
valid_parts = [p for p in parts if p in self.known_docs]
if valid_parts:
detected_docs = valid_parts
final_text = match.group(2).strip()
if not detected_docs: detected_docs = ["ALL"]
for doc in detected_docs:
if doc not in parsed: parsed[doc] = []
parsed[doc].append(final_text)
return parsed
def check_files(self, files, manual_cabinet, progress=gr.Progress()):
print("\n--- Начало проверки ---")
if not files: return "Файлы не загружены", None
if self.excel_db.empty: return "Сначала загрузите Excel базу!", None
checklist = {}
detected_cabinet = "Не определен"
found_by_method = ""
is_manual = False
if manual_cabinet and manual_cabinet.strip():
detected_cabinet = manual_cabinet
found_by_method = "manual"
is_manual = True
else:
db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
for file_path in progress.tqdm(files, desc="Поиск номера шкафа"):
raw_text = self.extract_text(file_path)
# --- ПОИСК ПО НОМЕРУ (2 ШАБЛОНА) ---
pdf_numbers = self.find_all_decimal_numbers(raw_text)
for cand in pdf_numbers:
if cand in db_clean_keys:
detected_cabinet = cand
found_by_method = "number"
break
if found_by_method == "number":
print(f"✅ Шкаф найден по номеру: {detected_cabinet}")
break
# --- ПОИСК ПО ИМЕНИ (УЛУЧШЕННЫЙ) ---
# Убираем переносы строк, чтобы "Шкаф\nСАУ" стало "Шкаф САУ"
flat_text = raw_text.replace("\n", " ").replace(" ", " ").lower()
unique_cabinets = self.excel_db["Cabinet"].unique()
for cab_name in unique_cabinets:
# Ищем только если это похоже на название, а не на код
if "ЛДАР" in cab_name or "РЛТ" in cab_name: continue
# Проверяем точное вхождение названия
clean_name = cab_name.lower().strip()
if len(clean_name) > 5 and clean_name in flat_text:
detected_cabinet = cab_name
found_by_method = "name"
print(f"✅ Шкаф найден по имени: {cab_name}")
break
if found_by_method == "name":
break
print(f"Определен шкаф: {detected_cabinet}")
if detected_cabinet == "Не определен":
return f"⚠️ Шкаф не опознан автоматически.\nВыберите Автора и Шкаф вручную.", None
is_clean_search = (found_by_method == "number")
remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search)
if not remarks:
return f"⚠️ Для шкафа '{detected_cabinet}' нет замечаний в базе.", None
processed_count = 0
for file_path in files:
fname = os.path.basename(file_path)
dtype = self.determine_doc_type(fname)
tasks = []
if dtype in remarks: tasks.extend(remarks[dtype])
if "ALL" in remarks and dtype != "С2": tasks.extend(remarks["ALL"])
if tasks:
checklist[fname] = list(dict.fromkeys(tasks))
processed_count += 1
pdf_title = detected_cabinet
if is_manual: pdf_title += " (Выбор вручную)"
print("Генерация PDF...")
try:
pdf = self.create_pdf(pdf_title, checklist)
except Exception as e:
print(f"ОШИБКА PDF: {e}")
return f"Ошибка создания PDF: {e}", None
total = sum(len(v) for v in checklist.values())
method_str = "Ручной выбор" if is_manual else ("По номеру" if is_clean_search else "По имени")
return f"✅ Готово!\n📂 Шкаф: {detected_cabinet}\n🔍 Метод: {method_str}\n📄 Файлов: {processed_count}\n🚩 Замечаний: {total}", pdf
def create_pdf(self, cabinet, data):
fname = f"CheckList_Result.pdf"
path = os.path.join(tempfile.gettempdir(), fname)
c = canvas.Canvas(path, pagesize=A4)
form = c.acroForm
width, height = A4
font_name = 'Helvetica'
local_font = "arial.ttf"
try:
if os.path.exists(local_font):
pdfmetrics.registerFont(TTFont('Arial', local_font))
font_name = 'Arial'
elif os.path.exists("C:\\Windows\\Fonts\\arial.ttf"):
pdfmetrics.registerFont(TTFont('Arial', "C:\\Windows\\Fonts\\arial.ttf"))
font_name = 'Arial'
except:
pass
y = height - 50
c.setFont(font_name, 16)
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
y -= 25
c.setFont(font_name, 12)
disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet
c.drawString(50, y, f"Шкаф: {disp_cab}")
c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}")
y -= 20
c.line(50, y, width - 50, y)
y -= 30
if not data:
c.drawString(50, y, "Нет замечаний.")
c.save()
return path
cb_id = 0
for filename, tasks in data.items():
if y < 100: c.showPage(); y = height - 50; c.setFont(font_name, 12)
c.setFillColor(colors.darkblue)
c.setFont(font_name, 11)
c.drawString(50, y, f"Файл: {filename}")
c.setFillColor(colors.black)
y -= 15
c.setFont(font_name, 10)
for task in tasks:
paragraphs = task.split('\n')
if y < 80: c.showPage(); y = height - 50; c.setFont(font_name, 10)
form.checkbox(name=f"cb_{cb_id}", x=50, y=y - 10, size=10, buttonStyle='check', forceBorder=True, fillColor=colors.white)
cb_id += 1
text_start_y = y - 2
for paragraph in paragraphs:
max_len = 95
lines = []
words = paragraph.split(' ')
cur_line = ""
for w in words:
if len(cur_line) + len(w) + 1 <= max_len:
cur_line += w + " "
else:
lines.append(cur_line);
cur_line = w + " "
lines.append(cur_line)
for l in lines:
if text_start_y < 40: c.showPage(); text_start_y = height - 50; c.setFont(font_name, 10)
c.drawString(65, text_start_y, l.strip())
text_start_y -= 12
y = text_start_y - 8
y -= 10
c.setStrokeColor(colors.lightgrey)
c.line(50, y, width - 50, y)
c.setStrokeColor(colors.black)
y -= 20
c.save()
return path
# --- ИНТЕРФЕЙС ---
css = """
.gradio-container { max-width: 95% !important; }
.compact_file { height: 150px !important; min-height: 150px !important; max-height: 150px !important; overflow: hidden !important; }
.orange_btn { background: #FF7F27 !important; border: none !important; color: white !important; font-weight: bold; }
.orange_btn:hover { background: #E06010 !important; }
footer { display: none !important; }
"""
def create_app():
checker = KDChecker()
# Добавляем css и theme СЮДА
# Убрали css и theme отсюда
with gr.Blocks(title="Генератор чек-листов КД") as app:
gr.Markdown("## ✅ Генератор чек-листов КД")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. База технических замечаний")
db_in = gr.File(label="Excel (.xlsx)", type="filepath", elem_classes="compact_file")
with gr.Group():
gr.Markdown("#### Ручной выбор:")
author_dd = gr.Dropdown(label="1. Разработчик КД", choices=[], interactive=True)
cabinet_dd = gr.Dropdown(label="2. Шкаф", choices=[], interactive=True)
db_out = gr.Textbox(label="Статус базы", lines=2, max_lines=3, interactive=False)
db_in.upload(checker.load_excel_db, inputs=[db_in], outputs=[db_out, author_dd, cabinet_dd])
author_dd.change(checker.get_cabinets_by_author, inputs=[author_dd], outputs=[cabinet_dd])
with gr.Column(scale=1):
gr.Markdown("### 2. Документация")
files_in = gr.File(label="Конструкторская документация (PDF)", file_count="multiple", type="filepath", elem_classes="compact_file")
gr.Markdown("")
btn = gr.Button("Сформировать чек-лист", variant="primary", elem_classes="orange_btn")
gr.Markdown("### 3. Результат")
with gr.Row():
with gr.Column(scale=1):
res_txt = gr.Textbox(label="Лог проверки", lines=5)
with gr.Column(scale=1):
res_pdf = gr.File(label="Скачать готовый PDF")
btn.click(checker.check_files, inputs=[files_in, cabinet_dd], outputs=[res_txt, res_pdf])
return app
if __name__ == "__main__":
app = create_app()
app.launch(
server_name="0.0.0.0",
server_port=7860,
css=css,
theme=gr.themes.Soft(),
ssr_mode=False
)