Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,28 +11,31 @@ from reportlab.pdfbase import pdfmetrics
|
|
| 11 |
from reportlab.pdfbase.ttfonts import TTFont
|
| 12 |
from reportlab.lib import colors
|
| 13 |
|
| 14 |
-
|
| 15 |
class KDChecker:
|
| 16 |
def __init__(self):
|
| 17 |
self.excel_db = pd.DataFrame()
|
| 18 |
self.cabinet_list = []
|
|
|
|
| 19 |
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
|
| 20 |
|
| 21 |
-
def load_excel_db(self,
|
| 22 |
-
|
|
|
|
| 23 |
return "Файл не выбран", gr.update(choices=[], value=None)
|
| 24 |
|
| 25 |
all_data = []
|
| 26 |
sheets_log = []
|
| 27 |
|
| 28 |
try:
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
for sheet_name, df_raw in xls.items():
|
| 32 |
header_row_index = -1
|
| 33 |
cab_col_idx = -1
|
| 34 |
rem_col_idx = -1
|
| 35 |
|
|
|
|
| 36 |
for i in range(min(20, len(df_raw))):
|
| 37 |
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
|
| 38 |
c_idx = -1
|
|
@@ -47,7 +50,7 @@ class KDChecker:
|
|
| 47 |
break
|
| 48 |
|
| 49 |
if header_row_index != -1:
|
| 50 |
-
df = pd.read_excel(
|
| 51 |
df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]]
|
| 52 |
df_subset.columns = ["Cabinet", "Remark"]
|
| 53 |
|
|
@@ -78,6 +81,7 @@ class KDChecker:
|
|
| 78 |
def extract_text(self, pdf_path):
|
| 79 |
try:
|
| 80 |
full_text = ""
|
|
|
|
| 81 |
with pdfplumber.open(pdf_path) as pdf:
|
| 82 |
for page in pdf.pages:
|
| 83 |
full_text += (page.extract_text() or "") + "\n"
|
|
@@ -179,8 +183,9 @@ class KDChecker:
|
|
| 179 |
# 2. Автопоиск
|
| 180 |
else:
|
| 181 |
all_pdf_text = ""
|
| 182 |
-
for
|
| 183 |
-
|
|
|
|
| 184 |
|
| 185 |
# А. По номеру
|
| 186 |
pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
|
|
@@ -206,7 +211,6 @@ class KDChecker:
|
|
| 206 |
if found_by_method == "name": break
|
| 207 |
|
| 208 |
if detected_cabinet == "Не определен":
|
| 209 |
-
examples = ", ".join(self.excel_db["Cabinet"].head(3).tolist())
|
| 210 |
return f"⚠️ Шкаф не опознан автоматически.\nСовет: Выберите похожий шкаф из выпадающего списка вручную.", None
|
| 211 |
|
| 212 |
is_clean_search = (found_by_method == "number")
|
|
@@ -216,8 +220,9 @@ class KDChecker:
|
|
| 216 |
return f"⚠️ Шкаф '{detected_cabinet}' выбран, но в базе нет замечаний для него.", None
|
| 217 |
|
| 218 |
processed_count = 0
|
| 219 |
-
for
|
| 220 |
-
|
|
|
|
| 221 |
dtype = self.determine_doc_type(fname)
|
| 222 |
|
| 223 |
tasks = []
|
|
@@ -265,6 +270,7 @@ class KDChecker:
|
|
| 265 |
|
| 266 |
y = height - 50
|
| 267 |
c.setFont(font_name, 16)
|
|
|
|
| 268 |
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
|
| 269 |
y -= 25
|
| 270 |
c.setFont(font_name, 12)
|
|
@@ -368,5 +374,4 @@ def create_app():
|
|
| 368 |
app = create_app()
|
| 369 |
|
| 370 |
if __name__ == "__main__":
|
| 371 |
-
|
| 372 |
-
app.launch(auth=("admin", "12345"))
|
|
|
|
| 11 |
from reportlab.pdfbase.ttfonts import TTFont
|
| 12 |
from reportlab.lib import colors
|
| 13 |
|
|
|
|
| 14 |
class KDChecker:
|
| 15 |
def __init__(self):
|
| 16 |
self.excel_db = pd.DataFrame()
|
| 17 |
self.cabinet_list = []
|
| 18 |
+
# Список известных обозначений документов
|
| 19 |
self.known_docs = ["Э3", "В4", "ПЭ3", "ВО", "ТЭ5", "СБ", "С5", "ОЛ", "Э1", "Э4", "Э7", "Д3", "Э6"]
|
| 20 |
|
| 21 |
+
def load_excel_db(self, excel_path):
|
| 22 |
+
# В Gradio 4.x файл приходит как строка (путь), а не объект
|
| 23 |
+
if excel_path is None:
|
| 24 |
return "Файл не выбран", gr.update(choices=[], value=None)
|
| 25 |
|
| 26 |
all_data = []
|
| 27 |
sheets_log = []
|
| 28 |
|
| 29 |
try:
|
| 30 |
+
# Читаем Excel напрямую по пути (excel_path)
|
| 31 |
+
xls = pd.read_excel(excel_path, sheet_name=None, header=None)
|
| 32 |
|
| 33 |
for sheet_name, df_raw in xls.items():
|
| 34 |
header_row_index = -1
|
| 35 |
cab_col_idx = -1
|
| 36 |
rem_col_idx = -1
|
| 37 |
|
| 38 |
+
# Ищем строку заголовка
|
| 39 |
for i in range(min(20, len(df_raw))):
|
| 40 |
row_values = [str(x).lower().strip() for x in df_raw.iloc[i].values]
|
| 41 |
c_idx = -1
|
|
|
|
| 50 |
break
|
| 51 |
|
| 52 |
if header_row_index != -1:
|
| 53 |
+
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row_index)
|
| 54 |
df_subset = df.iloc[:, [cab_col_idx, rem_col_idx]]
|
| 55 |
df_subset.columns = ["Cabinet", "Remark"]
|
| 56 |
|
|
|
|
| 81 |
def extract_text(self, pdf_path):
|
| 82 |
try:
|
| 83 |
full_text = ""
|
| 84 |
+
# pdfplumber открывает файл по пути
|
| 85 |
with pdfplumber.open(pdf_path) as pdf:
|
| 86 |
for page in pdf.pages:
|
| 87 |
full_text += (page.extract_text() or "") + "\n"
|
|
|
|
| 183 |
# 2. Автопоиск
|
| 184 |
else:
|
| 185 |
all_pdf_text = ""
|
| 186 |
+
for file_path in files:
|
| 187 |
+
# file_path - это строка-путь
|
| 188 |
+
all_pdf_text += self.extract_text(file_path) + "\n"
|
| 189 |
|
| 190 |
# А. По номеру
|
| 191 |
pdf_numbers = self.find_all_decimal_numbers(all_pdf_text)
|
|
|
|
| 211 |
if found_by_method == "name": break
|
| 212 |
|
| 213 |
if detected_cabinet == "Не определен":
|
|
|
|
| 214 |
return f"⚠️ Шкаф не опознан автоматически.\nСовет: Выберите похожий шкаф из выпадающего списка вручную.", None
|
| 215 |
|
| 216 |
is_clean_search = (found_by_method == "number")
|
|
|
|
| 220 |
return f"⚠️ Шкаф '{detected_cabinet}' выбран, но в базе нет замечаний для него.", None
|
| 221 |
|
| 222 |
processed_count = 0
|
| 223 |
+
for file_path in files:
|
| 224 |
+
# Получаем имя файла из пути
|
| 225 |
+
fname = os.path.basename(file_path)
|
| 226 |
dtype = self.determine_doc_type(fname)
|
| 227 |
|
| 228 |
tasks = []
|
|
|
|
| 270 |
|
| 271 |
y = height - 50
|
| 272 |
c.setFont(font_name, 16)
|
| 273 |
+
# Обработка кириллицы в заголовке
|
| 274 |
c.drawString(50, y, f"ЧЕК-ЛИСТ ПРОВЕРКИ КД")
|
| 275 |
y -= 25
|
| 276 |
c.setFont(font_name, 12)
|
|
|
|
| 374 |
app = create_app()
|
| 375 |
|
| 376 |
if __name__ == "__main__":
|
| 377 |
+
app.launch(auth=("admin", "12345"))
|
|
|