Ed5's picture
Update app.py
418e0a8 verified
raw
history blame
15.7 kB
import gradio as gr
import pandas as pd
import pdfplumber
import os
import tempfile
import re
from datetime import datetime
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib import colors
# ========== НАСТРОЙКИ АВТОРИЗАЦИИ ==========
AUTH_USERNAME = "admin"
AUTH_PASSWORD = "12345"
# ===========================================
print("===== Application Startup =====")
print(f"Time: {datetime.now()}")
print(f"Gradio version: {gr.__version__}")
print(f"Auth: {AUTH_USERNAME} / *****")
print("================================")
class KDChecker:
def __init__(self):
self.excel_db = pd.DataFrame()
self.cabinet_list = []
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
print("[KDChecker] Initialized")
def load_excel_db(self, excel_file):
"""Загрузка Excel базы знаний"""
log = [f"[{datetime.now().strftime('%H:%M:%S')}] Начало загрузки"]
if excel_file is None:
return "❌ Файл не выбран", gr.Dropdown.update(choices=[], value=None)
# Получаем путь к файлу
if isinstance(excel_file, str):
excel_path = excel_file
else:
excel_path = excel_file.name if hasattr(excel_file, 'name') else str(excel_file)
log.append(f"Путь: {excel_path}")
log.append(f"Тип: {type(excel_file)}")
if not os.path.exists(excel_path):
log.append(f"❌ Файл не найден!")
return "\n".join(log), gr.Dropdown.update(choices=[], value=None)
try:
file_size = os.path.getsize(excel_path)
log.append(f"Размер: {file_size} байт")
except Exception as e:
log.append(f"Ошибка размера: {e}")
all_data = []
sheets_info = []
try:
log.append("Читаю Excel...")
xls = pd.read_excel(excel_path, sheet_name=None, header=None, engine='openpyxl')
log.append(f"Листов найдено: {len(xls)}")
for sheet_name, df_raw in xls.items():
header_row = -1
cab_col = -1
rem_col = -1
for i in range(min(20, len(df_raw))):
row_vals = [str(x).lower().strip() for x in df_raw.iloc[i].values]
for idx, val in enumerate(row_vals):
if "шкаф" in val or "cabinet" in val:
cab_col = idx
if "примечание" in val or "remark" in val:
rem_col = idx
if cab_col != -1 and rem_col != -1:
header_row = i
break
if header_row != -1:
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row, engine='openpyxl')
if cab_col < len(df.columns) and rem_col < len(df.columns):
df_sub = df.iloc[:, [cab_col, rem_col]]
df_sub.columns = ["Cabinet", "Remark"]
df_sub["Cabinet"] = df_sub["Cabinet"].ffill()
df_sub = df_sub.dropna(subset=["Remark"]).astype(str)
df_sub["Cabinet_Clean"] = df_sub["Cabinet"].apply(
lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
)
all_data.append(df_sub)
sheets_info.append(f"'{sheet_name}': {len(df_sub)}")
if not all_data:
log.append("❌ Данные не найдены ни на одном листе")
return "\n".join(log), gr.Dropdown.update(choices=[], value=None)
self.excel_db = pd.concat(all_data, ignore_index=True)
self.cabinet_list = sorted(self.excel_db["Cabinet"].unique().tolist())
result = f"✅ База загружена!\n\n"
result += f"📊 Записей: {len(self.excel_db)}\n"
result += f"🗄️ Шкафов: {len(self.cabinet_list)}\n"
result += f"📋 Листы: {', '.join(sheets_info)}"
return result, gr.Dropdown.update(choices=self.cabinet_list, value=None)
except Exception as e:
log.append(f"❌ ОШИБКА: {str(e)}")
import traceback
log.append(traceback.format_exc())
return "\n".join(log), gr.Dropdown.update(choices=[], value=None)
def extract_text(self, pdf_path):
try:
text = ""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text += (page.extract_text() or "") + "\n"
return text
except:
return ""
def find_decimal_numbers(self, text):
pattern = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?"
matches = []
for m in re.finditer(pattern, text):
clean = m.group(0).replace(" ", "").replace("\n", "")
if clean not in matches:
matches.append(clean)
return matches
def determine_doc_type(self, filename):
fname = filename.upper()
if "С2" in fname: return "С2"
if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
if "Э4" in fname: return "Э4"
if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
if "СБ" in fname: return "СБ"
if "С5" in fname: return "С5"
if "ОЛ" in fname: return "ОЛ"
if "Э1" in fname: return "Э1"
if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
return "UNKNOWN"
def get_remarks(self, cabinet_key, is_clean=True):
if self.excel_db.empty:
return {}
if is_clean:
target = cabinet_key.replace(" ", "")
mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
else:
mask = self.excel_db['Cabinet'] == cabinet_key
rows = self.excel_db[mask]
if rows.empty:
return {}
parsed = {}
for remark in rows['Remark']:
text = str(remark)
text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', text)
items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', text)
for item in items:
if len(item) < 3:
continue
clean_item = item.strip()
no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)'
match = re.match(pattern, no_num, re.IGNORECASE | re.DOTALL)
docs = []
final_text = clean_item
if match:
potential = match.group(1).upper()
cleaned = potential.replace("(", " ").replace(")", " ").replace(",", " ")
parts = cleaned.split()
valid = [p for p in parts if p in self.known_docs]
if valid:
docs = valid
final_text = match.group(2).strip()
if not docs:
docs = ["ALL"]
for d in docs:
if d not in parsed:
parsed[d] = []
parsed[d].append(final_text)
return parsed
def check_files(self, files, manual_cabinet):
if not files:
return "❌ Загрузите PDF файлы", None
if self.excel_db.empty:
return "❌ Сначала загрузите Excel базу!", None
# Получаем пути к файлам
file_paths = []
for f in files:
if isinstance(f, str):
file_paths.append(f)
else:
file_paths.append(f.name if hasattr(f, 'name') else str(f))
checklist = {}
detected_cabinet = "Не определен"
method = ""
is_manual = False
if manual_cabinet and manual_cabinet.strip():
detected_cabinet = manual_cabinet
method = "manual"
is_manual = True
else:
all_text = ""
for fp in file_paths:
all_text += self.extract_text(fp) + "\n"
numbers = self.find_decimal_numbers(all_text)
db_keys = set(self.excel_db["Cabinet_Clean"].tolist())
for num in numbers:
if num in db_keys:
detected_cabinet = num
method = "number"
break
if detected_cabinet == "Не определен":
for cab in self.excel_db["Cabinet"].unique():
parts = [n.strip() for n in cab.split(',')]
for part in parts:
if len(part) >= 5 and part.lower() in all_text.lower():
detected_cabinet = cab
method = "name"
break
if method == "name":
break
if detected_cabinet == "Не определен":
return "⚠️ Шкаф не определён. Выберите вручную из списка.", None
is_clean = (method == "number")
remarks = self.get_remarks(detected_cabinet, is_clean=is_clean)
if not remarks:
return f"⚠️ Шкаф '{detected_cabinet}' найден, но замечаний нет.", None
for fp in file_paths:
fname = os.path.basename(fp)
dtype = self.determine_doc_type(fname)
tasks = []
if dtype in remarks:
tasks.extend(remarks[dtype])
if "ALL" in remarks and dtype != "С2":
tasks.extend(remarks["ALL"])
if tasks:
checklist[fname] = list(dict.fromkeys(tasks))
title = detected_cabinet
if is_manual:
title += " (ручной выбор)"
try:
pdf_path = self.create_pdf(title, checklist)
except Exception as e:
return f"❌ Ошибка PDF: {e}", None
total = sum(len(v) for v in checklist.values())
method_str = "Ручной" if is_manual else ("По номеру" if is_clean else "По названию")
result = f"✅ Готово!\n\n"
result += f"📂 Шкаф: {detected_cabinet}\n"
result += f"🔍 Метод: {method_str}\n"
result += f"📄 Файлов: {len(file_paths)}\n"
result += f"🚩 Замечаний: {total}"
return result, pdf_path
def create_pdf(self, cabinet, data):
path = os.path.join(tempfile.gettempdir(), "CheckList.pdf")
c = canvas.Canvas(path, pagesize=A4)
width, height = A4
font = 'Helvetica'
y = height - 50
c.setFont(font, 16)
c.drawString(50, y, "CHECK-LIST KD")
y -= 25
c.setFont(font, 12)
cab_display = cabinet[:50] + "..." if len(cabinet) > 50 else cabinet
c.drawString(50, y, f"Cabinet: {cab_display}")
c.drawString(400, y, f"Date: {datetime.now().strftime('%d.%m.%Y')}")
y -= 20
c.line(50, y, width - 50, y)
y -= 30
if not data:
c.drawString(50, y, "No remarks for loaded files.")
c.save()
return path
for filename, tasks in data.items():
if y < 100:
c.showPage()
y = height - 50
c.setFont(font, 12)
c.setFillColor(colors.darkblue)
c.setFont(font, 11)
c.drawString(50, y, f"File: {filename}")
c.setFillColor(colors.black)
y -= 18
c.setFont(font, 10)
for task in tasks:
if y < 60:
c.showPage()
y = height - 50
c.setFont(font, 10)
c.rect(50, y - 2, 8, 8, stroke=1, fill=0)
# Разбивка на строки
words = task.replace('\n', ' ').split()
lines = []
line = ""
for w in words:
if len(line) + len(w) < 85:
line += w + " "
else:
lines.append(line.strip())
line = w + " "
if line:
lines.append(line.strip())
for i, ln in enumerate(lines):
if y < 40:
c.showPage()
y = height - 50
c.setFont(font, 10)
c.drawString(65, y, ln)
y -= 12
y -= 5
y -= 15
c.setStrokeColor(colors.lightgrey)
c.line(50, y, width - 50, y)
c.setStrokeColor(colors.black)
y -= 10
c.save()
return path
# ========== СОЗДАНИЕ ИНТЕРФЕЙСА ==========
checker = KDChecker()
with gr.Blocks(title="КД Checker") as app:
gr.Markdown("# ✅ Генератор чек-листов КД")
gr.Markdown("Загрузите Excel базу и PDF чертежи для формирования чек-листа.")
with gr.Row():
with gr.Column():
gr.Markdown("### 📁 1. База знаний (Excel)")
db_input = gr.File(label="Загрузить .xlsx файл", file_types=[".xlsx", ".xls"])
cabinet_dropdown = gr.Dropdown(label="Или выберите шкаф вручную", choices=[], interactive=True)
db_status = gr.Textbox(label="Статус", lines=6)
with gr.Column():
gr.Markdown("### 📄 2. Чертежи (PDF)")
pdf_input = gr.File(label="Загрузить PDF", file_count="multiple", file_types=[".pdf"])
run_btn = gr.Button("🔍 Сформировать чек-лист", variant="primary")
with gr.Row():
result_text = gr.Textbox(label="Результат", lines=6)
result_pdf = gr.File(label="📥 Скачать PDF")
# События
db_input.change(
fn=checker.load_excel_db,
inputs=[db_input],
outputs=[db_status, cabinet_dropdown]
)
run_btn.click(
fn=checker.check_files,
inputs=[pdf_input, cabinet_dropdown],
outputs=[result_text, result_pdf]
)
# ========== ЗАПУСК ==========
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
auth=(AUTH_USERNAME, AUTH_PASSWORD),
auth_message="Введите логин и пароль"
)