Ed5 commited on
Commit
4a9f35f
·
verified ·
1 Parent(s): edc5aa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +371 -0
app.py CHANGED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import pdfplumber
4
+ import os
5
+ import tempfile
6
+ import re
7
+ from datetime import datetime
8
+ from reportlab.pdfgen import canvas
9
+ from reportlab.lib.pagesizes import A4
10
+ from reportlab.pdfbase import pdfmetrics
11
+ from reportlab.pdfbase.ttfonts import TTFont
12
+ from reportlab.lib import colors
13
+
14
+
15
+ class KDChecker:
16
+ def __init__(self):
17
+ self.excel_db = pd.DataFrame()
18
+ self.cabinet_list = []
19
+ self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
20
+
21
+ def load_excel_db(self, excel_file):
22
+ if excel_file is None:
23
+ return "Файл не выбран", gr.update(choices=[], value=None)
24
+
25
+ all_data = []
26
+ sheets_log = []
27
+
28
+ try:
29
+ xls = pd.read_excel(excel_file.name, sheet_name=None, header=None)
30
+
31
+ for sheet_name, df_raw in xls.items():
32
+ header_row_index = -1
33
+ cab_col_idx = -1
34
+ rem_col_idx = -1
35
+
36
+ for i in range(min(20, len(df_raw))):
37
+ row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
38
+ c_idx = -1
39
+ r_idx = -1
40
+ for idx, val in enumerate(row_values):
41
+ if "шкаф" in val or "cabinet" in val: c_idx = idx
42
+ if "примечание" in val or "remark" in val: r_idx = idx
43
+ if c_idx != -1 and r_idx != -1:
44
+ header_row_index = i
45
+ cab_col_idx = c_idx
46
+ rem_col_idx = r_idx
47
+ break
48
+
49
+ if header_row_index != -1:
50
+ df = pd.read_excel(excel_file.name, sheet_name=sheet_name, header=header_row_index)
51
+ df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]]
52
+ df_subset.columns = ["Cabinet", "Remark"]
53
+
54
+ df_subset["Cabinet"] = df_subset["Cabinet"].ffill()
55
+ df_subset = df_subset.dropna(subset=["Remark"]).astype(str)
56
+
57
+ df_subset["Cabinet_Clean"] = df_subset["Cabinet"].apply(
58
+ lambda x: x.strip().replace(" ", "").replace("\n", "").replace("\r", "")
59
+ )
60
+
61
+ all_data.append(df_subset)
62
+ sheets_log.append(f"Лист '{sheet_name}': {len(df_subset)} стр.")
63
+ else:
64
+ sheets_log.append(f"Лист '{sheet_name}': заголовки не найдены")
65
+
66
+ if not all_data:
67
+ return "❌ Ошибка: Не найдены заголовки 'Шкаф' и 'Примечание'.", gr.update(choices=[], value=None)
68
+
69
+ self.excel_db = pd.concat(all_data, ignore_index=True)
70
+ self.cabinet_list = sorted(self.excel_db["Cabinet"].unique().tolist())
71
+
72
+ msg = f"✅ База знаний загружена успешно!\nВсего записей: {len(self.excel_db)}\nОбработаны листы: {', '.join(sheets_log)}"
73
+ return msg, gr.update(choices=self.cabinet_list, value=None, interactive=True)
74
+
75
+ except Exception as e:
76
+ return f"❌ Ошибка чтения Excel: {e}", gr.update(choices=[], value=None)
77
+
78
+ def extract_text(self, pdf_path):
79
+ try:
80
+ full_text = ""
81
+ with pdfplumber.open(pdf_path) as pdf:
82
+ for page in pdf.pages:
83
+ full_text += (page.extract_text() or "") + "\n"
84
+ return full_text
85
+ except:
86
+ return ""
87
+
88
+ def find_all_decimal_numbers(self, text):
89
+ pattern = r"(РЛТ|ЛДАР|ВНАР|ШТМ)[\s\.]*\d{1}[\s\.]*\d{3}[\s\.]*[А-ЯA-Z]{1,4}[\s\.]*\d{3}(-[\d]+)?"
90
+ matches = []
91
+ for match in re.finditer(pattern, text):
92
+ clean_num = match.group(0).replace(" ", "").replace("\n", "")
93
+ if clean_num not in matches:
94
+ matches.append(clean_num)
95
+ return matches
96
+
97
+ def determine_doc_type(self, filename):
98
+ fname = filename.upper()
99
+ if "С2" in fname: return "С2"
100
+ if "ПЭ3" in fname or "ПЕРЕЧЕНЬ" in fname: return "ПЭ3"
101
+ if "Э3" in fname or "СХЕМА ЭЛЕКТРИЧЕСКАЯ" in fname: return "Э3"
102
+ if "Э4" in fname: return "Э4"
103
+ if "В4" in fname or "СПЕЦИФИКАЦИЯ" in fname: return "В4"
104
+ if "ВО" in fname or "Э7" in fname or "ГАБАРИТ" in fname: return "ВО"
105
+ if "ТЭ5" in fname or "ТАБЛИЦА" in fname: return "ТЭ5"
106
+ if "СБ" in fname: return "СБ"
107
+ if "С5" in fname: return "С5"
108
+ if "ОЛ" in fname: return "ОЛ"
109
+ if "Э1" in fname: return "Э1"
110
+ if "Э6" in fname or "ЗАЗЕМЛЕНИЯ" in fname: return "Э6"
111
+ if "Д3" in fname or "МОНТАЖ" in fname: return "Д3"
112
+ return "UNKNOWN"
113
+
114
+ def get_remarks(self, cabinet_key, is_clean_key=True):
115
+ if self.excel_db.empty: return {}
116
+
117
+ if is_clean_key:
118
+ target = cabinet_key.replace(" ", "")
119
+ mask = self.excel_db['Cabinet_Clean'].str.contains(re.escape(target), case=False, na=False)
120
+ else:
121
+ mask = self.excel_db['Cabinet'] == cabinet_key
122
+
123
+ rows = self.excel_db[mask]
124
+ if rows.empty: return {}
125
+
126
+ parsed = {}
127
+
128
+ for remark_cell in rows['Remark']:
129
+ cell_text = str(remark_cell)
130
+ cell_text = re.sub(r'(\d+)\.([А-ЯA-Z])', r'\1. \2', cell_text)
131
+
132
+ items = re.split(r'(?:^|\n)\s*(?=\d+[\.\)])', cell_text)
133
+
134
+ for item in items:
135
+ if len(item) < 3: continue
136
+ clean_item = item.strip()
137
+ clean_item_no_num = re.sub(r'^\d+[\.\)]\s*', '', clean_item)
138
+
139
+ doc_pattern = r'^(?:Документ\s+|В\s+)?([А-ЯA-Z0-9\s,\(\)\-]+?)(?:[\.\:\-]|\s+)(.*)'
140
+ match = re.match(doc_pattern, clean_item_no_num, re.IGNORECASE | re.DOTALL)
141
+
142
+ detected_docs = []
143
+ final_text = clean_item
144
+
145
+ if match:
146
+ potential_docs_str = match.group(1).upper()
147
+ cleaned_codes = potential_docs_str.replace("(", " ").replace(")", " ").replace(",", " ")
148
+ parts = cleaned_codes.split()
149
+
150
+ valid_parts = [p for p in parts if p in self.known_docs]
151
+ if valid_parts:
152
+ detected_docs = valid_parts
153
+ final_text = match.group(2).strip()
154
+
155
+ if not detected_docs:
156
+ detected_docs = ["ALL"]
157
+
158
+ for doc in detected_docs:
159
+ if doc not in parsed: parsed[doc] = []
160
+ parsed[doc].append(final_text)
161
+
162
+ return parsed
163
+
164
+ def check_files(self, files, manual_cabinet):
165
+ if not files: return "Файлы не загружены", None
166
+ if self.excel_db.empty: return "Сначала загрузите Excel базу!", None
167
+
168
+ checklist = {}
169
+ detected_cabinet = "Не определен"
170
+ found_by_method = ""
171
+ is_manual = False
172
+
173
+ # 1. Ручной выбор
174
+ if manual_cabinet and manual_cabinet.strip():
175
+ detected_cabinet = manual_cabinet
176
+ found_by_method = "manual"
177
+ is_manual = True
178
+
179
+ # 2. Автопоиск
180
+ else:
181
+ all_pdf_text = ""
182
+ for file in files:
183
+ all_pdf_text += self.extract_text(file.name) + "\n"
184
+
185
+ # А. По номеру
186
+ pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
187
+ db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
188
+
189
+ for cand in pdf_numbers:
190
+ if cand in db_clean_keys:
191
+ detected_cabinet = cand
192
+ found_by_method = "number"
193
+ break
194
+
195
+ # Б. По имени
196
+ if detected_cabinet == "Не определен":
197
+ unique_cabinets = self.excel_db["Cabinet"].unique()
198
+ for cab_name in unique_cabinets:
199
+ sub_names = [n.strip() for n in cab_name.split(',')]
200
+ for sub_name in sub_names:
201
+ if len(sub_name) < 5: continue
202
+ if sub_name.lower() in all_pdf_text.lower():
203
+ detected_cabinet = cab_name
204
+ found_by_method = "name"
205
+ break
206
+ if found_by_method == "name": break
207
+
208
+ if detected_cabinet == "Не определен":
209
+ examples = ", ".join(self.excel_db["Cabinet"].head(3).tolist())
210
+ return f"⚠️ Шкаф не опознан автоматически.\nСовет: Выберите похожий шкаф из выпадающего списка вручную.", None
211
+
212
+ is_clean_search = (found_by_method == "number")
213
+ remarks = self.get_remarks(detected_cabinet, is_clean_key=is_clean_search)
214
+
215
+ if not remarks:
216
+ return f"⚠️ Шкаф '{detected_cabinet}' выбран, но в базе нет замечаний для него.", None
217
+
218
+ processed_count = 0
219
+ for file in files:
220
+ fname = os.path.basename(file.name)
221
+ dtype = self.determine_doc_type(fname)
222
+
223
+ tasks = []
224
+ if dtype in remarks: tasks.extend(remarks[dtype])
225
+ if "ALL" in remarks and dtype != "С2": tasks.extend(remarks["ALL"])
226
+
227
+ if tasks:
228
+ checklist[fname] = list(dict.fromkeys(tasks))
229
+ processed_count += 1
230
+
231
+ pdf_title = detected_cabinet
232
+ if is_manual: pdf_title += " (Выбор вручную)"
233
+
234
+ pdf = self.create_pdf(pdf_title, checklist)
235
+ total = sum(len(v) for v in checklist.values())
236
+
237
+ method_str = "Ручной выбор" if is_manual else (
238
+ "По децимальному н��меру" if is_clean_search else "По наименованию")
239
+
240
+ return f"✅ Готово!\n\n📂 Шкаф: {detected_cabinet}\n🔍 Метод: {method_str}\n📄 Обработано файлов: {processed_count}\n🚩 Всего замечаний: {total}", pdf
241
+
242
+ def create_pdf(self, cabinet, data):
243
+ fname = f"CheckList_Result.pdf"
244
+ path = os.path.join(tempfile.gettempdir(), fname)
245
+ c = canvas.Canvas(path, pagesize=A4)
246
+ form = c.acroForm
247
+ width, height = A4
248
+
249
+ # Подключение шрифтов (попытка найти Arial, иначе стандартный)
250
+ font_name = 'Helvetica'
251
+ font_path = "arial.ttf" # Ожидаем файл рядом со скриптом на сервере
252
+
253
+ # Проверяем системные пути Windows и локальную папку
254
+ possible_paths = ["C:\\Windows\\Fonts\\arial.ttf", "arial.ttf",
255
+ "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf"]
256
+
257
+ for p in possible_paths:
258
+ if os.path.exists(p):
259
+ try:
260
+ pdfmetrics.registerFont(TTFont('Arial', p))
261
+ font_name = 'Arial'
262
+ break
263
+ except:
264
+ continue
265
+
266
+ y = height - 50
267
+ c.setFont(font_name, 16)
268
+ c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
269
+ y -= 25
270
+ c.setFont(font_name, 12)
271
+ disp_cab = cabinet[:60] + "..." if len(cabinet) > 60 else cabinet
272
+ c.drawString(50, y, f"Шкаф: {disp_cab}")
273
+ c.drawString(400, y, f"Дата: {datetime.now().strftime('%d.%m.%Y')}")
274
+ y -= 20
275
+ c.line(50, y, width - 50, y)
276
+ y -= 30
277
+
278
+ if not data:
279
+ c.drawString(50, y, "Нет замечаний для загруженных файлов.")
280
+ c.save()
281
+ return path
282
+
283
+ cb_id = 0
284
+ for filename, tasks in data.items():
285
+ if y < 100: c.showPage(); y = height - 50; c.setFont(font_name, 12)
286
+
287
+ c.setFillColor(colors.darkblue)
288
+ c.setFont(font_name, 11)
289
+ c.drawString(50, y, f"Файл: {filename}")
290
+ c.setFillColor(colors.black)
291
+ y -= 15
292
+ c.setFont(font_name, 10)
293
+
294
+ for task in tasks:
295
+ paragraphs = task.split('\n')
296
+
297
+ if y < 80:
298
+ c.showPage();
299
+ y = height - 50;
300
+ c.setFont(font_name, 10)
301
+
302
+ c.rect(50, y - 10, 10, 10)
303
+ form.checkbox(name=f"cb_{cb_id}", x=50, y=y - 10, size=10, buttonStyle='check', forceBorder=True)
304
+ cb_id += 1
305
+
306
+ text_start_y = y - 2
307
+
308
+ for paragraph in paragraphs:
309
+ max_len = 95
310
+ lines = []
311
+ words = paragraph.split(' ')
312
+ cur_line = ""
313
+ for w in words:
314
+ if len(cur_line) + len(w) + 1 <= max_len:
315
+ cur_line += w + " "
316
+ else:
317
+ lines.append(cur_line); cur_line = w + " "
318
+ lines.append(cur_line)
319
+
320
+ for l in lines:
321
+ if text_start_y < 40:
322
+ c.showPage();
323
+ text_start_y = height - 50;
324
+ c.setFont(font_name, 10)
325
+ c.drawString(65, text_start_y, l.strip())
326
+ text_start_y -= 12
327
+
328
+ y = text_start_y - 8
329
+
330
+ y -= 10
331
+ c.setStrokeColor(colors.lightgrey)
332
+ c.line(50, y, width - 50, y)
333
+ c.setStrokeColor(colors.black)
334
+ y -= 20
335
+
336
+ c.save()
337
+ return path
338
+
339
+
340
+ def create_app():
341
+ checker = KDChecker()
342
+ with gr.Blocks(title="Генератор чек-листов КД") as app:
343
+ gr.Markdown("# ✅ Генератор чек-листов КД")
344
+ gr.Markdown("Автоматическая проверка конструкторской документации по базе знаний Excel.")
345
+
346
+ with gr.Row():
347
+ with gr.Column():
348
+ gr.Markdown("### 1. База знаний")
349
+ db_in = gr.File(label="Загрузить Excel (.xlsx)")
350
+ manual_cab = gr.Dropdown(label="Или выберите шкаф-аналог вручную", choices=[], interactive=True)
351
+ db_out = gr.Textbox(label="Статус загрузки", lines=8, max_lines=30)
352
+
353
+ db_in.upload(checker.load_excel_db, inputs=[db_in], outputs=[db_out, manual_cab])
354
+
355
+ with gr.Column():
356
+ gr.Markdown("### 2. Документация (PDF)")
357
+ files_in = gr.File(label="Загрузить чертежи", file_count="multiple")
358
+ btn = gr.Button("Сформировать чек-лист", variant="primary")
359
+
360
+ with gr.Row():
361
+ res_txt = gr.Textbox(label="Результат проверки", lines=8, max_lines=30)
362
+ res_pdf = gr.File(label="Скачать PDF чек-лист")
363
+
364
+ btn.click(checker.check_files, inputs=[files_in, manual_cab], outputs=[res_txt, res_pdf])
365
+ return app
366
+
367
+
368
+ app = create_app()
369
+
370
+ if __name__ == "__main__":
371
+ app.launch()