Spaces:
Sleeping
Sleeping
File size: 18,222 Bytes
4a9f35f 29867a2 4a9f35f b46bb37 2e977a9 4a9f35f 29867a2 97a4e0f 4a9f35f 2e977a9 6761181 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 e607d91 2e977a9 6761181 2e977a9 4a9f35f 6761181 4a9f35f 6761181 2e977a9 6761181 4a9f35f 2e977a9 6761181 4a9f35f 2e977a9 4a9f35f 5dac689 418e0a8 2e977a9 4a9f35f 2e977a9 4a9f35f e67e728 d1c3f7f e67e728 d1c3f7f e67e728 d1c3f7f 2e977a9 e67e728 d1c3f7f e67e728 d1c3f7f e67e728 d1c3f7f 4a9f35f 2e977a9 4a9f35f 2e977a9 865492a 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 865492a 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 418e0a8 2e977a9 418e0a8 2e977a9 418e0a8 97a4e0f 4a9f35f 2e977a9 4a9f35f 2e977a9 5dac689 d1c3f7f 5dac689 e67e728 d1c3f7f 5dac689 d1c3f7f 5dac689 d1c3f7f 6761181 2e977a9 e67e728 d1c3f7f e67e728 d1c3f7f e67e728 d1c3f7f e67e728 d1c3f7f e67e728 d1c3f7f 2e977a9 4a9f35f 6761181 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 9a5caff 2e977a9 9a5caff 2e977a9 4a9f35f 9a5caff 2e977a9 4a9f35f e67e728 2e977a9 4a9f35f 2e977a9 29867a2 4a9f35f 2e977a9 29867a2 2e977a9 5dac689 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 4a9f35f 2e977a9 b46bb37 2e977a9 ada24b4 2e977a9 4a9f35f 4512ed0 e67e728 2e977a9 e67e728 2e977a9 865492a b46bb37 2e977a9 ada24b4 e607d91 5b39d0c 2e977a9 2e608e4 2e977a9 d1c3f7f f74bdc2 d1c3f7f 2e977a9 ada24b4 6761181 2e977a9 33d0a8b 29867a2 2e977a9 6761181 2e977a9 ada24b4 4a9f35f 2e977a9 5b39d0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 |
import gradio as gr
import pandas as pd
import pdfplumber
import os
import tempfile
import re
from datetime import datetime
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib import colors
# --- ЛОГИКА ---
class KDChecker:
def __init__(self):
self.excel_db = pd.DataFrame()
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
def load_excel_db(self, excel_path):
print(f"--- Загрузка Excel: {excel_path} ---")
if excel_path is None:
return "Файл не выбран", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
all_data = []
sheets_log = []
try:
xls = pd.read_excel(excel_path, sheet_name=None, header=None)
for sheet_name, df_raw in xls.items():
header_row_index = -1
cab_col_idx = -1
rem_col_idx = -1
for i in range(min(20, len(df_raw))):
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
c_idx = -1
r_idx = -1
for idx, val in enumerate(row_values):
if "шкаф" in val or "cabinet" in val: c_idx = idx
if "примечание" in val or "remark" in val: r_idx = idx
if c_idx != -1 and r_idx != -1:
header_row_index = i
cab_col_idx = c_idx
rem_col_idx = r_idx
break
if header_row_index != -1:
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index)
df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]].copy()
df_subset.columns = ["Cabinet", "Remark"]
df_subset["Author"] = sheet_name
df_subset["Cabinet"] = df_subset["Cabinet"].ffill()
df_subset = df_subset.dropna(subset=["Remark"]).astype(str)
df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply(
lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
)
all_data.append(df_subset)
sheets_log.append(f"'{sheet_name}': {len(df_subset)}")
if not all_data:
return "❌ Ошибка: Не найдены заголовки 'Шкаф' и 'Примечание'.", gr.update(choices=[]), gr.update(choices=[])
self.excel_db = pd.concat(all_data, ignore_index=True)
authors_list = sorted(self.excel_db["Author"].unique().tolist())
print(f"Excel загружен. Всего строк: {len(self.excel_db)}")
msg = f"✅ База загружена!\nЗаписей: {len(self.excel_db)}\nАвторы: {', '.join(sheets_log)}"
return msg, gr.update(choices=authors_list, value=None, interactive=True), gr.update(choices=[], value=None)
except Exception as e:
print(f"Ошибка Excel: {e}")
return f"❌ Ошибка: {e}", gr.update(choices=[]), gr.update(choices=[])
def get_cabinets_by_author(self, author_name):
if self.excel_db.empty or not author_name:
return gr.update(choices=[], value=None)
filtered_cabs = self.excel_db[self.excel_db["Author"] == author_name]["Cabinet"].unique().tolist()
return gr.update(choices=sorted(filtered_cabs), value=None, interactive=True)
def extract_text(self, pdf_path):
text = ""
try:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages[:5]:
text += (page.extract_text() or "") + "\n"
except Exception as e:
print(f"Ошибка чтения PDF {pdf_path}: {e}")
return text
def find_all_decimal_numbers(self, text):
matches = []
# Шаблон 1: Специфичный (РЛТ.1.006.ША.030)
# Ищет: Префикс + цифра + 3 цифры + буквы + 3 цифры
pattern_custom = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?"
# Шаблон 2: Стандартный ГОСТ (ЛДАР.421246.337)
# Ищет: Префикс + точка + 6 цифр + точка + 3 цифры (допускаются пробелы вместо точек)
pattern_gost = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{6}[\s\.]*\d{3}"
# Ищем по первому шаблону
for match in re.finditer(pattern_custom, text):
clean_num = match.group(0).replace(" ", "").replace("\n", "")
if clean_num not in matches:
matches.append(clean_num)
# Ищем по второму шаблону
for match in re.finditer(pattern_gost, text):
clean_num = match.group(0).replace(" ", "").replace("\n", "")
if clean_num not in matches:
matches.append(clean_num)
return matches
def determine_doc_type(self, filename):
fname = filename.upper()
if "С2" in fname: return "С2"
if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
if "Э4" in fname: return "Э4"
if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
if "СБ" in fname: return "СБ"
if "С5" in fname: return "С5"
if "ОЛ" in fname: return "ОЛ"
if "Э1" in fname: return "Э1"
if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
return "UNKNOWN"
def get_remarks(self, cabinet_key, is_clean_key=True):
if self.excel_db.empty: return {}
if is_clean_key:
target = cabinet_key.replace(" ", "")
mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
else:
mask = self.excel_db['Cabinet'] == cabinet_key
rows = self.excel_db[mask]
if rows.empty: return {}
parsed = {}
for remark_cell in rows['Remark']:
cell_text = str(remark_cell)
cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text)
items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', cell_text)
for item in items:
if len(item) < 3: continue
clean_item = item.strip()
clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
doc_pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)'
match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE | re.DOTALL)
detected_docs = []
final_text = clean_item
if match:
potential_docs_str = match.group(1).upper()
cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ")
parts = cleaned_codes.split()
valid_parts = [p for p in parts if p in self.known_docs]
if valid_parts:
detected_docs = valid_parts
final_text = match.group(2).strip()
if not detected_docs: detected_docs = ["ALL"]
for doc in detected_docs:
if doc not in parsed: parsed[doc] = []
parsed[doc].append(final_text)
return parsed
def check_files(self, files, manual_cabinet, progress=gr.Progress()):
print("\n--- Начало проверки ---")
if not files: return "Файлы не загружены", None
if self.excel_db.empty: return "Сначала загрузите Excel базу!", None
checklist = {}
detected_cabinet = "Не определен"
found_by_method = ""
is_manual = False
if manual_cabinet and manual_cabinet.strip():
detected_cabinet = manual_cabinet
found_by_method = "manual"
is_manual = True
else:
db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
for file_path in progress.tqdm(files, desc="Поиск номера шкафа"):
raw_text = self.extract_text(file_path)
# --- ПОИСК ПО НОМЕРУ (2 ШАБЛОНА) ---
pdf_numbers = self.find_all_decimal_numbers(raw_text)
for cand in pdf_numbers:
if cand in db_clean_keys:
detected_cabinet = cand
found_by_method = "number"
break
if found_by_method == "number":
print(f"✅ Шкаф найден по номеру: {detected_cabinet}")
break
# --- ПОИСК ПО ИМЕНИ (УЛУЧШЕННЫЙ) ---
# Убираем переносы строк, чтобы "Шкаф\nСАУ" стало "Шкаф САУ"
flat_text = raw_text.replace("\n", " ").replace(" ", " ").lower()
unique_cabinets = self.excel_db["Cabinet"].unique()
for cab_name in unique_cabinets:
# Ищем только если это похоже на название, а не на код
if "ЛДАР" in cab_name or "РЛТ" in cab_name: continue
# Проверяем точное вхождение названия
clean_name = cab_name.lower().strip()
if len(clean_name) > 5 and clean_name in flat_text:
detected_cabinet = cab_name
found_by_method = "name"
print(f"✅ Шкаф найден по имени: {cab_name}")
break
if found_by_method == "name":
break
print(f"Определен шкаф: {detected_cabinet}")
if detected_cabinet == "Не определен":
return f"⚠️ Шкаф не опознан автоматически.\nВыберите Автора и Шкаф вручную.", None
is_clean_search = (found_by_method == "number")
remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search)
if not remarks:
return f"⚠️ Для шкафа '{detected_cabinet}' нет замечаний в базе.", None
processed_count = 0
for file_path in files:
fname = os.path.basename(file_path)
dtype = self.determine_doc_type(fname)
tasks = []
if dtype in remarks: tasks.extend(remarks[dtype])
if "ALL" in remarks and dtype != "С2": tasks.extend(remarks["ALL"])
if tasks:
checklist[fname] = list(dict.fromkeys(tasks))
processed_count += 1
pdf_title = detected_cabinet
if is_manual: pdf_title += " (Выбор вручную)"
print("Генерация PDF...")
try:
pdf = self.create_pdf(pdf_title, checklist)
except Exception as e:
print(f"ОШИБКА PDF: {e}")
return f"Ошибка создания PDF: {e}", None
total = sum(len(v) for v in checklist.values())
method_str = "Ручной выбор" if is_manual else ("По номеру" if is_clean_search else "По имени")
return f"✅ Готово!\n📂 Шкаф: {detected_cabinet}\n🔍 Метод: {method_str}\n📄 Файлов: {processed_count}\n🚩 Замечаний: {total}", pdf
def create_pdf(self, cabinet, data):
fname = f"CheckList_Result.pdf"
path = os.path.join(tempfile.gettempdir(), fname)
c = canvas.Canvas(path, pagesize=A4)
form = c.acroForm
width, height = A4
font_name = 'Helvetica'
local_font = "arial.ttf"
try:
if os.path.exists(local_font):
pdfmetrics.registerFont(TTFont('Arial', local_font))
font_name = 'Arial'
elif os.path.exists("C:\\Windows\\Fonts\\arial.ttf"):
pdfmetrics.registerFont(TTFont('Arial', "C:\\Windows\\Fonts\\arial.ttf"))
font_name = 'Arial'
except:
pass
y = height - 50
c.setFont(font_name, 16)
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
y -= 25
c.setFont(font_name, 12)
disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet
c.drawString(50, y, f"Шкаф: {disp_cab}")
c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}")
y -= 20
c.line(50, y, width - 50, y)
y -= 30
if not data:
c.drawString(50, y, "Нет замечаний.")
c.save()
return path
cb_id = 0
for filename, tasks in data.items():
if y < 100: c.showPage(); y = height - 50; c.setFont(font_name, 12)
c.setFillColor(colors.darkblue)
c.setFont(font_name, 11)
c.drawString(50, y, f"Файл: {filename}")
c.setFillColor(colors.black)
y -= 15
c.setFont(font_name, 10)
for task in tasks:
paragraphs = task.split('\n')
if y < 80: c.showPage(); y = height - 50; c.setFont(font_name, 10)
form.checkbox(name=f"cb_{cb_id}", x=50, y=y - 10, size=10, buttonStyle='check', forceBorder=True, fillColor=colors.white)
cb_id += 1
text_start_y = y - 2
for paragraph in paragraphs:
max_len = 95
lines = []
words = paragraph.split(' ')
cur_line = ""
for w in words:
if len(cur_line) + len(w) + 1 <= max_len:
cur_line += w + " "
else:
lines.append(cur_line);
cur_line = w + " "
lines.append(cur_line)
for l in lines:
if text_start_y < 40: c.showPage(); text_start_y = height - 50; c.setFont(font_name, 10)
c.drawString(65, text_start_y, l.strip())
text_start_y -= 12
y = text_start_y - 8
y -= 10
c.setStrokeColor(colors.lightgrey)
c.line(50, y, width - 50, y)
c.setStrokeColor(colors.black)
y -= 20
c.save()
return path
# --- ИНТЕРФЕЙС ---
css = """
.gradio-container { max-width: 95% !important; }
.compact_file { height: 150px !important; min-height: 150px !important; max-height: 150px !important; overflow: hidden !important; }
.orange_btn { background: #FF7F27 !important; border: none !important; color: white !important; font-weight: bold; }
.orange_btn:hover { background: #E06010 !important; }
footer { display: none !important; }
"""
def create_app():
checker = KDChecker()
# Добавляем css и theme СЮДА
# Убрали css и theme отсюда
with gr.Blocks(title="Генератор чек-листов КД") as app:
gr.Markdown("## ✅ Генератор чек-листов КД")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. База технических замечаний")
db_in = gr.File(label="Excel (.xlsx)", type="filepath", elem_classes="compact_file")
with gr.Group():
gr.Markdown("#### Ручной выбор:")
author_dd = gr.Dropdown(label="1. Разработчик КД", choices=[], interactive=True)
cabinet_dd = gr.Dropdown(label="2. Шкаф", choices=[], interactive=True)
db_out = gr.Textbox(label="Статус базы", lines=2, max_lines=3, interactive=False)
db_in.upload(checker.load_excel_db, inputs=[db_in], outputs=[db_out, author_dd, cabinet_dd])
author_dd.change(checker.get_cabinets_by_author, inputs=[author_dd], outputs=[cabinet_dd])
with gr.Column(scale=1):
gr.Markdown("### 2. Документация")
files_in = gr.File(label="Конструкторская документация (PDF)", file_count="multiple", type="filepath", elem_classes="compact_file")
gr.Markdown("")
btn = gr.Button("Сформировать чек-лист", variant="primary", elem_classes="orange_btn")
gr.Markdown("### 3. Результат")
with gr.Row():
with gr.Column(scale=1):
res_txt = gr.Textbox(label="Лог проверки", lines=5)
with gr.Column(scale=1):
res_pdf = gr.File(label="Скачать готовый PDF")
btn.click(checker.check_files, inputs=[files_in, cabinet_dd], outputs=[res_txt, res_pdf])
return app
if __name__ == "__main__":
app = create_app()
app.launch(
server_name="0.0.0.0",
server_port=7860,
css=css,
theme=gr.themes.Soft(),
ssr_mode=False
) |