Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,11 +15,10 @@ class KDChecker:
|
|
| 15 |
def __init__(self):
|
| 16 |
self.excel_db = pd.DataFrame()
|
| 17 |
self.cabinet_list = []
|
| 18 |
-
# Список известных обозначений документов
|
| 19 |
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
|
| 20 |
|
| 21 |
def load_excel_db(self, excel_path):
|
| 22 |
-
#
|
| 23 |
if excel_path is None:
|
| 24 |
return "Файл не выбран", gr.update(choices=[], value=None)
|
| 25 |
|
|
@@ -27,7 +26,7 @@ class KDChecker:
|
|
| 27 |
sheets_log = []
|
| 28 |
|
| 29 |
try:
|
| 30 |
-
# Читаем Excel напрямую по пути
|
| 31 |
xls = pd.read_excel(excel_path, sheet_name=None, header=None)
|
| 32 |
|
| 33 |
for sheet_name, df_raw in xls.items():
|
|
@@ -35,7 +34,6 @@ class KDChecker:
|
|
| 35 |
cab_col_idx = -1
|
| 36 |
rem_col_idx = -1
|
| 37 |
|
| 38 |
-
# Ищем строку заголовка
|
| 39 |
for i in range(min(20, len(df_raw))):
|
| 40 |
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
|
| 41 |
c_idx = -1
|
|
@@ -76,12 +74,12 @@ class KDChecker:
|
|
| 76 |
return msg, gr.update(choices=self.cabinet_list, value=None, interactive=True)
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
-
|
|
|
|
| 80 |
|
| 81 |
def extract_text(self, pdf_path):
|
| 82 |
try:
|
| 83 |
full_text = ""
|
| 84 |
-
# pdfplumber открывает файл по пути
|
| 85 |
with pdfplumber.open(pdf_path) as pdf:
|
| 86 |
for page in pdf.pages:
|
| 87 |
full_text += (page.extract_text() or "") + "\n"
|
|
@@ -174,20 +172,15 @@ class KDChecker:
|
|
| 174 |
found_by_method = ""
|
| 175 |
is_manual = False
|
| 176 |
|
| 177 |
-
# 1. Ручной выбор
|
| 178 |
if manual_cabinet and manual_cabinet.strip():
|
| 179 |
detected_cabinet = manual_cabinet
|
| 180 |
found_by_method = "manual"
|
| 181 |
is_manual = True
|
| 182 |
-
|
| 183 |
-
# 2. Автопоиск
|
| 184 |
else:
|
| 185 |
all_pdf_text = ""
|
| 186 |
for file_path in files:
|
| 187 |
-
# file_path - это строка-путь
|
| 188 |
all_pdf_text += self.extract_text(file_path) + "\n"
|
| 189 |
|
| 190 |
-
# А. По номеру
|
| 191 |
pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
|
| 192 |
db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
|
| 193 |
|
|
@@ -197,7 +190,6 @@ class KDChecker:
|
|
| 197 |
found_by_method = "number"
|
| 198 |
break
|
| 199 |
|
| 200 |
-
# Б. По имени
|
| 201 |
if detected_cabinet == "Не определен":
|
| 202 |
unique_cabinets = self.excel_db["Cabinet"].unique()
|
| 203 |
for cab_name in unique_cabinets:
|
|
@@ -221,7 +213,6 @@ class KDChecker:
|
|
| 221 |
|
| 222 |
processed_count = 0
|
| 223 |
for file_path in files:
|
| 224 |
-
# Получаем имя файла из пути
|
| 225 |
fname = os.path.basename(file_path)
|
| 226 |
dtype = self.determine_doc_type(fname)
|
| 227 |
|
|
@@ -251,11 +242,9 @@ class KDChecker:
|
|
| 251 |
form = c.acroForm
|
| 252 |
width, height = A4
|
| 253 |
|
| 254 |
-
# Подключение шрифтов (попытка найти Arial, иначе стандартный)
|
| 255 |
font_name = 'Helvetica'
|
| 256 |
-
font_path = "arial.ttf"
|
| 257 |
|
| 258 |
-
# Проверяем системные пути Windows и локальную папку
|
| 259 |
possible_paths = ["C:\\Windows\\Fonts\\arial.ttf", "arial.ttf",
|
| 260 |
"/usr/share/fonts/truetype/msttcorefonts/Arial.ttf"]
|
| 261 |
|
|
@@ -270,7 +259,6 @@ class KDChecker:
|
|
| 270 |
|
| 271 |
y = height - 50
|
| 272 |
c.setFont(font_name, 16)
|
| 273 |
-
# Обработка кириллицы в заголовке
|
| 274 |
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
|
| 275 |
y -= 25
|
| 276 |
c.setFont(font_name, 12)
|
|
@@ -352,7 +340,8 @@ def create_app():
|
|
| 352 |
with gr.Row():
|
| 353 |
with gr.Column():
|
| 354 |
gr.Markdown("### 1. База знаний")
|
| 355 |
-
|
|
|
|
| 356 |
manual_cab = gr.Dropdown(label="Или выберите шкаф-аналог вручную", choices=[], interactive=True)
|
| 357 |
db_out = gr.Textbox(label="Статус загрузки", lines=8, max_lines=30)
|
| 358 |
|
|
@@ -360,7 +349,8 @@ def create_app():
|
|
| 360 |
|
| 361 |
with gr.Column():
|
| 362 |
gr.Markdown("### 2. Документация (PDF)")
|
| 363 |
-
|
|
|
|
| 364 |
btn = gr.Button("Сформировать чек-лист", variant="primary")
|
| 365 |
|
| 366 |
with gr.Row():
|
|
|
|
| 15 |
def __init__(self):
|
| 16 |
self.excel_db = pd.DataFrame()
|
| 17 |
self.cabinet_list = []
|
|
|
|
| 18 |
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
|
| 19 |
|
| 20 |
def load_excel_db(self, excel_path):
|
| 21 |
+
# Если файл не выбран
|
| 22 |
if excel_path is None:
|
| 23 |
return "Файл не выбран", gr.update(choices=[], value=None)
|
| 24 |
|
|
|
|
| 26 |
sheets_log = []
|
| 27 |
|
| 28 |
try:
|
| 29 |
+
# Читаем Excel напрямую по пути
|
| 30 |
xls = pd.read_excel(excel_path, sheet_name=None, header=None)
|
| 31 |
|
| 32 |
for sheet_name, df_raw in xls.items():
|
|
|
|
| 34 |
cab_col_idx = -1
|
| 35 |
rem_col_idx = -1
|
| 36 |
|
|
|
|
| 37 |
for i in range(min(20, len(df_raw))):
|
| 38 |
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
|
| 39 |
c_idx = -1
|
|
|
|
| 74 |
return msg, gr.update(choices=self.cabinet_list, value=None, interactive=True)
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
+
# Выводим ошибку в интерфейс, чтобы не гадать
|
| 78 |
+
return f"❌ Критическая ошибка чтения Excel: {str(e)}", gr.update(choices=[], value=None)
|
| 79 |
|
| 80 |
def extract_text(self, pdf_path):
|
| 81 |
try:
|
| 82 |
full_text = ""
|
|
|
|
| 83 |
with pdfplumber.open(pdf_path) as pdf:
|
| 84 |
for page in pdf.pages:
|
| 85 |
full_text += (page.extract_text() or "") + "\n"
|
|
|
|
| 172 |
found_by_method = ""
|
| 173 |
is_manual = False
|
| 174 |
|
|
|
|
| 175 |
if manual_cabinet and manual_cabinet.strip():
|
| 176 |
detected_cabinet = manual_cabinet
|
| 177 |
found_by_method = "manual"
|
| 178 |
is_manual = True
|
|
|
|
|
|
|
| 179 |
else:
|
| 180 |
all_pdf_text = ""
|
| 181 |
for file_path in files:
|
|
|
|
| 182 |
all_pdf_text += self.extract_text(file_path) + "\n"
|
| 183 |
|
|
|
|
| 184 |
pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
|
| 185 |
db_clean_keys = set(self.excel_db["Cabinet_Clean"].tolist())
|
| 186 |
|
|
|
|
| 190 |
found_by_method = "number"
|
| 191 |
break
|
| 192 |
|
|
|
|
| 193 |
if detected_cabinet == "Не определен":
|
| 194 |
unique_cabinets = self.excel_db["Cabinet"].unique()
|
| 195 |
for cab_name in unique_cabinets:
|
|
|
|
| 213 |
|
| 214 |
processed_count = 0
|
| 215 |
for file_path in files:
|
|
|
|
| 216 |
fname = os.path.basename(file_path)
|
| 217 |
dtype = self.determine_doc_type(fname)
|
| 218 |
|
|
|
|
| 242 |
form = c.acroForm
|
| 243 |
width, height = A4
|
| 244 |
|
|
|
|
| 245 |
font_name = 'Helvetica'
|
| 246 |
+
font_path = "arial.ttf"
|
| 247 |
|
|
|
|
| 248 |
possible_paths = ["C:\\Windows\\Fonts\\arial.ttf", "arial.ttf",
|
| 249 |
"/usr/share/fonts/truetype/msttcorefonts/Arial.ttf"]
|
| 250 |
|
|
|
|
| 259 |
|
| 260 |
y = height - 50
|
| 261 |
c.setFont(font_name, 16)
|
|
|
|
| 262 |
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
|
| 263 |
y -= 25
|
| 264 |
c.setFont(font_name, 12)
|
|
|
|
| 340 |
with gr.Row():
|
| 341 |
with gr.Column():
|
| 342 |
gr.Markdown("### 1. База знаний")
|
| 343 |
+
# ВАЖНО: Добавлен type="filepath", чтобы не зависало
|
| 344 |
+
db_in = gr.File(label="Загрузить Excel (.xlsx)", type="filepath")
|
| 345 |
manual_cab = gr.Dropdown(label="Или выберите шкаф-аналог вручную", choices=[], interactive=True)
|
| 346 |
db_out = gr.Textbox(label="Статус загрузки", lines=8, max_lines=30)
|
| 347 |
|
|
|
|
| 349 |
|
| 350 |
with gr.Column():
|
| 351 |
gr.Markdown("### 2. Документация (PDF)")
|
| 352 |
+
# ВАЖНО: Добавлен type="filepath", чтобы не зависало
|
| 353 |
+
files_in = gr.File(label="Загрузить чертежи", file_count="multiple", type="filepath")
|
| 354 |
btn = gr.Button("Сформировать чек-лист", variant="primary")
|
| 355 |
|
| 356 |
with gr.Row():
|