Ed5's picture
Update app.py
97a4e0f verified
raw
history blame
21.2 kB
import gradio as gr
import pandas as pd
import pdfplumber
import os
import tempfile
import re
from datetime import datetime
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib import colors
# ========== НАСТРОЙКИ АВТОРИЗАЦИИ ==========
AUTH_USERNAME = "admin"
AUTH_PASSWORD = "12345"
# ===========================================
print("===== Application Startup =====")
print(f"Time: {datetime.now()}")
print(f"Auth enabled: Username={AUTH_USERNAME}")
print("================================")
class KDChecker:
def __init__(self):
self.excel_db = pd.DataFrame()
self.cabinet_list = []
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
print("[KDChecker] Инициализация завершена")
def load_excel_db(self, excel_file):
"""Загрузка Excel базы знаний"""
log = [f"[{datetime.now().strftime('%H:%M:%S')}] Старт загрузки Excel"]
# Проверка входных данных
if excel_file is None:
log.append("ERROR: excel_file is None")
return "\n".join(log), gr.update(choices=[], value=None)
# Определяем путь к файлу (gradio может передать разные типы)
if isinstance(excel_file, str):
excel_path = excel_file
elif hasattr(excel_file, 'name'):
excel_path = excel_file.name
else:
excel_path = str(excel_file)
log.append(f"Тип входных данных: {type(excel_file)}")
log.append(f"Путь к файлу: {excel_path}")
# Проверка существования файла
if not os.path.exists(excel_path):
log.append(f"ERROR: Файл не найден по пути: {excel_path}")
return "\n".join(log), gr.update(choices=[], value=None)
try:
size = os.path.getsize(excel_path)
log.append(f"Размер файла: {size} байт")
except Exception as e:
log.append(f"ERROR при получении размера: {e}")
return "\n".join(log), gr.update(choices=[], value=None)
# Проверка openpyxl
try:
import openpyxl
log.append(f"openpyxl version: {openpyxl.__version__}")
except ImportError as e:
log.append(f"ERROR: openpyxl не установлен: {e}")
return "\n".join(log), gr.update(choices=[], value=None)
all_data = []
sheets_log = []
try:
log.append("Чтение Excel файла...")
xls = pd.read_excel(excel_path, sheet_name=None, header=None, engine='openpyxl')
log.append(f"Найдено листов: {len(xls)}")
log.append(f"Имена листов: {list(xls.keys())}")
for sheet_name, df_raw in xls.items():
log.append(f"--- Обработка листа: '{sheet_name}' ({len(df_raw)} строк) ---")
header_row_index = -1
cab_col_idx = -1
rem_col_idx = -1
# Поиск заголовков в первых 20 строках
for i in range(min(20, len(df_raw))):
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
c_idx = -1
r_idx = -1
for idx, val in enumerate(row_values):
if "шкаф" in val or "cabinet" in val:
c_idx = idx
if "примечание" in val or "remark" in val:
r_idx = idx
if c_idx != -1 and r_idx != -1:
header_row_index = i
cab_col_idx = c_idx
rem_col_idx = r_idx
log.append(f" Заголовки найдены в строке {i}: шкаф={c_idx}, примечание={r_idx}")
break
if header_row_index != -1:
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index, engine='openpyxl')
log.append(f" Колонок в листе: {len(df.columns)}")
if cab_col_idx < len(df.columns) and rem_col_idx < len(df.columns):
df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]]
df_subset.columns = ["Cabinet", "Remark"]
df_subset["Cabinet"] = df_subset["Cabinet"].ffill()
df_subset = df_subset.dropna(subset=["Remark"]).astype(str)
df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply(
lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
)
all_data.append(df_subset)
sheets_log.append(f"'{sheet_name}': {len(df_subset)} записей")
log.append(f" ✓ Добавлено {len(df_subset)} записей")
else:
sheets_log.append(f"'{sheet_name}': ошибка индексов")
log.append(f" ✗ Ошибка индексов колонок")
else:
sheets_log.append(f"'{sheet_name}': заголовки не найдены")
log.append(f" ✗ Заголовки 'шкаф'/'примечание' не найдены")
if not all_data:
log.append("ИТОГ: Не найдены данные ни на одном листе!")
return "\n".join(log), gr.update(choices=[], value=None)
self.excel_db = pd.concat(all_data, ignore_index=True)
self.cabinet_list = sorted(self.excel_db["Cabinet"].unique().tolist())
log.append(f"ИТОГ: Загружено {len(self.excel_db)} записей")
log.append(f"Уникальных шкафов: {len(self.cabinet_list)}")
result_msg = f"✅ База загружена успешно!\n\n"
result_msg += f"📊 Всего записей: {len(self.excel_db)}\n"
result_msg += f"🗄️ Шкафов: {len(self.cabinet_list)}\n"
result_msg += f"📋 Листы: {', '.join(sheets_log)}"
return result_msg, gr.update(choices=self.cabinet_list, value=None)
except Exception as e:
log.append(f"КРИТИЧЕСКАЯ ОШИБКА: {str(e)}")
import traceback
log.append(traceback.format_exc())
return "\n".join(log), gr.update(choices=[], value=None)
def extract_text(self, pdf_path):
"""Извлечение текста из PDF"""
try:
full_text = ""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
full_text += (page.extract_text() or "") + "\n"
return full_text
except Exception as e:
print(f"[extract_text] Error: {e}")
return ""
def find_all_decimal_numbers(self, text):
"""Поиск децимальных номеров в тексте"""
pattern = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?"
matches = []
for match in re.finditer(pattern, text):
clean_num = match.group(0).replace(" ", "").replace("\n", "")
if clean_num not in matches:
matches.append(clean_num)
return matches
def determine_doc_type(self, filename):
"""Определение типа документа по имени файла"""
fname = filename.upper()
if "С2" in fname: return "С2"
if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
if "Э4" in fname: return "Э4"
if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
if "СБ" in fname: return "СБ"
if "С5" in fname: return "С5"
if "ОЛ" in fname: return "ОЛ"
if "Э1" in fname: return "Э1"
if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
return "UNKNOWN"
def get_remarks(self, cabinet_key, is_clean_key=True):
"""Получение замечаний для шкафа"""
if self.excel_db.empty:
return {}
if is_clean_key:
target = cabinet_key.replace(" ", "")
mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
else:
mask = self.excel_db['Cabinet'] == cabinet_key
rows = self.excel_db[mask]
if rows.empty:
return {}
parsed = {}
for remark_cell in rows['Remark']:
cell_text = str(remark_cell)
cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text)
items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', cell_text)
for item in items:
if len(item) < 3:
continue
clean_item = item.strip()
clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
doc_pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)'
match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE | re.DOTALL)
detected_docs = []
final_text = clean_item
if match:
potential_docs_str = match.group(1).upper()
cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ")
parts = cleaned_codes.split()
valid_parts = [p for p in parts if p in self.known_docs]
if valid_parts:
detected_docs = valid_parts
final_text = match.group(2).strip()
if not detected_docs:
detected_docs = ["ALL"]
for doc in detected_docs:
if doc not in parsed:
parsed[doc] = []
parsed[doc].append(final_text)
return parsed
def check_files(self, files, manual_cabinet):
"""Проверка PDF файлов"""
if not files:
return "❌ Файлы не загружены", None
if self.excel_db.empty:
return "❌ Сначала загрузите Excel базу!", None
checklist = {}
detected_cabinet = "Не определен"
found_by_method = ""
is_manual = False
# Обработка входных файлов
file_paths = []
for f in files:
if isinstance(f, str):
file_paths.append(f)
elif hasattr(f, 'name'):
file_paths.append(f.name)
else:
file_paths.append(str(f))
if manual_cabinet and manual_cabinet.strip():
detected_cabinet = manual_cabinet
found_by_method = "manual"
is_manual = True
else:
all_pdf_text = ""
for file_path in file_paths:
all_pdf_text += self.extract_text(file_path) + "\n"
pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
for cand in pdf_numbers:
if cand in db_clean_keys:
detected_cabinet = cand
found_by_method = "number"
break
if detected_cabinet == "Не определен":
unique_cabinets = self.excel_db["Cabinet"].unique()
for cab_name in unique_cabinets:
sub_names = [n.strip() for n in cab_name.split(',')]
for sub_name in sub_names:
if len(sub_name) < 5:
continue
if sub_name.lower() in all_pdf_text.lower():
detected_cabinet = cab_name
found_by_method = "name"
break
if found_by_method == "name":
break
if detected_cabinet == "Не определен":
return "⚠️ Шкаф не опознан автоматически.\nВыберите шкаф из списка вручную.", None
is_clean_search = (found_by_method == "number")
remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search)
if not remarks:
return f"⚠️ Шкаф '{detected_cabinet}' найден, но замечаний в базе нет.", None
processed_count = 0
for file_path in file_paths:
fname = os.path.basename(file_path)
dtype = self.determine_doc_type(fname)
tasks = []
if dtype in remarks:
tasks.extend(remarks[dtype])
if "ALL" in remarks and dtype != "С2":
tasks.extend(remarks["ALL"])
if tasks:
checklist[fname] = list(dict.fromkeys(tasks))
processed_count += 1
pdf_title = detected_cabinet
if is_manual:
pdf_title += " (Ручной выбор)"
try:
pdf_path = self.create_pdf(pdf_title, checklist)
except Exception as e:
return f"❌ Ошибка создания PDF: {e}", None
total = sum(len(v) for v in checklist.values())
method_str = "Ручной выбор" if is_manual else (
"По децимальному номеру" if is_clean_search else "По наименованию")
result = f"✅ Чек-лист сформирован!\n\n"
result += f"📂 Шкаф: {detected_cabinet}\n"
result += f"🔍 Метод определения: {method_str}\n"
result += f"📄 Обработано файлов: {processed_count}\n"
result += f"🚩 Всего замечаний: {total}"
return result, pdf_path
def create_pdf(self, cabinet, data):
"""Создание PDF чек-листа"""
fname = "CheckList_Result.pdf"
path = os.path.join(tempfile.gettempdir(), fname)
c = canvas.Canvas(path, pagesize=A4)
width, height = A4
# Шрифт
font_name = 'Helvetica'
font_path = "arial.ttf"
if os.path.exists(font_path):
try:
pdfmetrics.registerFont(TTFont('Arial', font_path))
font_name = 'Arial'
except:
pass
y = height - 50
c.setFont(font_name, 16)
try:
c.drawString(50, y, "ЧЕК-ЛИСТ ПРОВЕРКИ КД")
except:
c.setFont("Helvetica", 16)
c.drawString(50, y, "CHECK-LIST KD")
y -= 25
c.setFont(font_name, 12)
disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet
try:
c.drawString(50, y, f"Шкаф: {disp_cab}")
except:
c.setFont("Helvetica", 12)
c.drawString(50, y, f"Cabinet: {disp_cab}")
c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}")
y -= 20
c.line(50, y, width - 50, y)
y -= 30
if not data:
c.drawString(50, y, "Нет замечаний для загруженных файлов.")
c.save()
return path
for filename, tasks in data.items():
if y < 100:
c.showPage()
y = height - 50
c.setFont(font_name, 12)
c.setFillColor(colors.darkblue)
c.setFont(font_name, 11)
try:
c.drawString(50, y, f"Файл: {filename}")
except:
c.setFont("Helvetica", 11)
c.drawString(50, y, f"File: {filename}")
c.setFillColor(colors.black)
y -= 15
c.setFont(font_name, 10)
for task in tasks:
if y < 80:
c.showPage()
y = height - 50
c.setFont(font_name, 10)
c.rect(50, y - 2, 8, 8, stroke=1, fill=0)
# Разбивка текста на строки
max_len = 90
words = task.replace('\n', ' ').split(' ')
lines = []
cur_line = ""
for w in words:
if len(cur_line) + len(w) + 1 <= max_len:
cur_line += w + " "
else:
lines.append(cur_line.strip())
cur_line = w + " "
if cur_line:
lines.append(cur_line.strip())
for i, line in enumerate(lines):
if y < 40:
c.showPage()
y = height - 50
c.setFont(font_name, 10)
x_offset = 65 if i == 0 else 65
try:
c.drawString(x_offset, y, line)
except:
pass
y -= 12
y -= 5
y -= 10
c.setStrokeColor(colors.lightgrey)
c.line(50, y, width - 50, y)
c.setStrokeColor(colors.black)
y -= 15
c.save()
return path
def create_app():
checker = KDChecker()
with gr.Blocks(title="Генератор чек-листов КД", theme=gr.themes.Soft()) as app:
gr.Markdown("# ✅ Генератор чек-листов КД")
gr.Markdown("Автоматическая проверка конструкторской документации по базе знаний Excel.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📁 1. База знаний")
db_in = gr.File(
label="Загрузить Excel (.xlsx)",
file_types=[".xlsx", ".xls"],
type="filepath"
)
manual_cab = gr.Dropdown(
label="Или выберите шкаф вручную",
choices=[],
interactive=True,
allow_custom_value=False
)
db_out = gr.Textbox(
label="Статус загрузки",
lines=8,
max_lines=15
)
with gr.Column(scale=1):
gr.Markdown("### 📄 2. Документация (PDF)")
files_in = gr.File(
label="Загрузить чертежи (PDF)",
file_count="multiple",
file_types=[".pdf"],
type="filepath"
)
btn = gr.Button("🔍 Сформировать чек-лист", variant="primary", size="lg")
with gr.Row():
with gr.Column(scale=1):
res_txt = gr.Textbox(label="Результат проверки", lines=8)
with gr.Column(scale=1):
res_pdf = gr.File(label="📥 Скачать PDF чек-лист")
# Привязка событий
db_in.change(
fn=checker.load_excel_db,
inputs=[db_in],
outputs=[db_out, manual_cab]
)
btn.click(
fn=checker.check_files,
inputs=[files_in, manual_cab],
outputs=[res_txt, res_pdf]
)
return app
# ========== ЗАПУСК ПРИЛОЖЕНИЯ ==========
app = create_app()
if __name__ == "__main__":
print(f"Starting with auth: {AUTH_USERNAME} / {'*' * len(AUTH_PASSWORD)}")
app.launch(
server_name="0.0.0.0",
server_port=7860,
auth=(AUTH_USERNAME, AUTH_PASSWORD),
auth_message="Введите логин и пароль для доступа"
)