Spaces:

SimrusDenuvo
/

chat

Sleeping

App Files Files Community

SimrusDenuvo commited on May 16

Commit

34db0e6

verified ·

1 Parent(s): 8519047

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -85

app.py CHANGED Viewed

@@ -4,127 +4,120 @@ from transformers import pipeline
 from datasets import load_dataset
 # Загружаем датасет
-dataset = load_dataset("Romjiik/Russian_bank_reviews", split="train")
-# Примеры для few-shot (без 'rating')
-few_shot_examples = []
-for row in dataset.select(range(2)):
-    review = row["review"]
-    ex = f"Клиент: {review}\nОтвет: Спасибо за обращение! Уточните, пожалуйста, детали ситуации, чтобы мы могли помочь."
-    few_shot_examples.append(ex)
-# Системные инструкции
 cot_instruction = (
-    "Ты — банковский помощник. Твоя задача — классифицировать клиентское обращение.\n"
-    "Проанализируй обращение пошагово, выдели ключевые слова, выясни намерение клиента,\n"
-    "и отнеси его к одной из категорий: вход в ЛК, SMS, заявка, ошибка, перевод, карта, другое."
 )
 simple_instruction = (
-    "Ты — банковский помощник. Классифицируй обращение пользователя кратко и по существу,\n"
-    "укажи одну категорию: вход в ЛК, SMS, заявка, ошибка, перевод, карта, другое."
 )
-# Модели
 models = {
-    "ChatGPT-like (ruGPT3-small)": pipeline("text-generation", model="ai-forever/rugpt3small_based_on_gpt2", tokenizer="ai-forever/rugpt3small_based_on_gpt2", device=-1),
-    "DeepSeek-like (rubert-tiny2)": pipeline("text-classification", model="cointegrated/rubert-tiny2", tokenizer="cointegrated/rubert-tiny2", device=-1),
-    "GigaChat-like (sberbank-ai/rugpt3medium_based_on_gpt2)": pipeline("text-generation", model="sberbank-ai/rugpt3medium_based_on_gpt2", tokenizer="sberbank-ai/rugpt3medium_based_on_gpt2", device=-1),
 }
-# Промпт CoT
 def build_cot_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
     return (
-        f"{cot_instruction}\n\n{examples}\n\nКлиент: {user_input}\n"
-        "Рассуждение и классификация:"
     )
-# Промпт простой
 def build_simple_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
     return (
-        f"{simple_instruction}\n\n{examples}\n\nКлиент: {user_input}\n"
-        "Категория:"
     )
-# Генерация ответов по двум промптам
 def generate_dual_answers(user_input):
     results = {}
     prompt_cot = build_cot_prompt(user_input)
     prompt_simple = build_simple_prompt(user_input)
     for name, pipe in models.items():
-        if name.startswith("DeepSeek-like"):
-            # Text-classification модель
-            start_simple = time.time()
-            classification = pipe(user_input)[0]['label']
-            end_simple = round(time.time() - start_simple, 2)
-            results[name] = {
-                "cot_answer": "(CoT не поддерживается)",
-                "cot_time": "-",
-                "simple_answer": classification,
-                "simple_time": end_simple
-            }
-        else:
             # CoT
             start_cot = time.time()
-            out_cot = pipe(prompt_cot, max_length=200, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_cot = round(time.time() - start_cot, 2)
             answer_cot = out_cot.strip().split("\n")[-1]
             # Simple
             start_simple = time.time()
-            out_simple = pipe(prompt_simple, max_length=150, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_simple = round(time.time() - start_simple, 2)
             answer_simple = out_simple.strip().split("\n")[-1]
-            results[name] = {
-                "cot_answer": answer_cot,
-                "cot_time": end_cot,
-                "simple_answer": answer_simple,
-                "simple_time": end_simple
-            }
-    return (
-        results["ChatGPT-like (ruGPT3-small)"]["cot_answer"], f"{results['ChatGPT-like (ruGPT3-small)']['cot_time']} сек",
-        results["ChatGPT-like (ruGPT3-small)"]["simple_answer"], f"{results['ChatGPT-like (ruGPT3-small)']['simple_time']} сек",
-        results["DeepSeek-like (rubert-tiny2)"]["cot_answer"], results["DeepSeek-like (rubert-tiny2)"]["cot_time"],
-        results["DeepSeek-like (rubert-tiny2)"]["simple_answer"], f"{results['DeepSeek-like (rubert-tiny2)']['simple_time']} сек",
-        results["GigaChat-like (sberbank-ai/rugpt3medium_based_on_gpt2)"]["cot_answer"], f"{results['GigaChat-like (sberbank-ai/rugpt3medium_based_on_gpt2)']['cot_time']} сек",
-        results["GigaChat-like (sberbank-ai/rugpt3medium_based_on_gpt2)"]["simple_answer"], f"{results['GigaChat-like (sberbank-ai/rugpt3medium_based_on_gpt2)']['simple_time']} сек",
     )
-# Интерфейс Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## 🏦 Классификация клиентских обращений (CoT + обычный)")
-    inp = gr.Textbox(label="Обращение клиента", placeholder="Например: Я не могу попасть в личный кабинет", lines=2)
-    btn = gr.Button("Классифицировать")
-    gr.Markdown("### ChatGPT-like (ruGPT3-small)")
-    cot1 = gr.Textbox(label="CoT ответ")
-    cot1_time = gr.Textbox(label="Время CoT")
-    simple1 = gr.Textbox(label="Обычный ответ")
-    simple1_time = gr.Textbox(label="Время обычного")
-    gr.Markdown("### DeepSeek-like (rubert-tiny2)")
-    cot2 = gr.Textbox(label="CoT ответ")
-    cot2_time = gr.Textbox(label="Время CoT")
-    simple2 = gr.Textbox(label="Обычный ответ")
-    simple2_time = gr.Textbox(label="Время обычного")
-    gr.Markdown("### GigaChat-like (ruGPT3-medium)")
-    cot3 = gr.Textbox(label="CoT ответ")
-    cot3_time = gr.Textbox(label="Время CoT")
-    simple3 = gr.Textbox(label="Обычный ответ")
-    simple3_time = gr.Textbox(label="Время обычного")
-    btn.click(generate_dual_answers, inputs=[inp], outputs=[
-        cot1, cot1_time, simple1, simple1_time,
-        cot2, cot2_time, simple2, simple2_time,
-        cot3, cot3_time, simple3, simple3_time
-    ])
-if __name__ == '__main__':
-    demo.launch()

 from datasets import load_dataset
 # Загружаем датасет
+DATASET_NAME = "Romjiik/Russian_bank_reviews"
+dataset = load_dataset(DATASET_NAME, split="train")
+# Краткий список примеров для подстановки в промпт (для классификации)
+few_shot_examples = [
+    "Клиент: Не могу войти в приложение.\nКлассификация: Техническая проблема",
+    "Клиент: Почему с меня сняли деньги дважды?\nКлассификация: Ошибка транзакции",
+    "Клиент: Хочу оформить кредит.\nКлассификация: Запрос на продукт",
+    "Клиент: У меня украли карту.\nКлассификация: Безопасность",
+    "Клиент: Не приходит СМС для входа.\nКлассификация: Проблема авторизации"
+]
+# Инструкции
 cot_instruction = (
+    "Ты — банковский помощник. Клиент описывает ситуацию. "
+    "Проанализируй обращение шаг за шагом и определи категорию (например: 'Техническая проблема', 'Запрос на продукт', 'Безопасность' и т.п.)"
 )
 simple_instruction = (
+    "Ты — банковский помощник. Клиент описывает обращение. "
+    "Кратко укажи категорию обращения (например: 'Техническая проблема', 'Запрос на продукт', 'Безопасность' и т.п.)."
 )
+# Используемые модели (CPU-compatible, ≤16GB)
 models = {
+    "ChatGPT-like (FRED-T5-small)": pipeline("text2text-generation", model="ai-forever/FRED-T5-Base", tokenizer="ai-forever/FRED-T5-Base", device=-1),
+    "DeepSeek-like (ruGPT3-small)": pipeline("text-generation", model="ai-forever/rugpt3small_based_on_gpt2", tokenizer="ai-forever/rugpt3small_based_on_gpt2", device=-1),
+    "GigaChat-like (RuBERT-tiny2-clf)": pipeline("text-classification", model="cointegrated/rubert-tiny2", tokenizer="cointegrated/rubert-tiny2", device=-1)
 }
+# Построение промптов
 def build_cot_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
     return (
+        f"{cot_instruction}\n\n{examples}\n\nКлиент: {user_input}\nРассуждение и классификация:"
     )
 def build_simple_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
     return (
+        f"{simple_instruction}\n\n{examples}\n\nКлиент: {user_input}\nКлассификация:"
     )
+# Генерация классификаций
 def generate_dual_answers(user_input):
     results = {}
     prompt_cot = build_cot_prompt(user_input)
     prompt_simple = build_simple_prompt(user_input)
     for name, pipe in models.items():
+        if "text-generation" in str(pipe.task):
             # CoT
             start_cot = time.time()
+            out_cot = pipe(prompt_cot, max_length=256, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_cot = round(time.time() - start_cot, 2)
             answer_cot = out_cot.strip().split("\n")[-1]
             # Simple
             start_simple = time.time()
+            out_simple = pipe(prompt_simple, max_length=128, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_simple = round(time.time() - start_simple, 2)
             answer_simple = out_simple.strip().split("\n")[-1]
+        elif "text2text-generation" in str(pipe.task):
+            start_cot = time.time()
+            out_cot = pipe(prompt_cot, max_new_tokens=50)[0]["generated_text"]
+            end_cot = round(time.time() - start_cot, 2)
+            start_simple = time.time()
+            out_simple = pipe(prompt_simple, max_new_tokens=30)[0]["generated_text"]
+            end_simple = round(time.time() - start_simple, 2)
+            answer_cot = out_cot.strip()
+            answer_simple = out_simple.strip()
+        elif "text-classification" in str(pipe.task):
+            # Для классификации используем только сам ввод без промпта
+            start = time.time()
+            answer = pipe(user_input)[0]['label']
+            end = round(time.time() - start, 2)
+            answer_cot = answer
+            answer_simple = answer
+            end_cot = end_simple = end
+        results[name] = {
+            "cot_answer": answer_cot,
+            "cot_time": end_cot,
+            "simple_answer": answer_simple,
+            "simple_time": end_simple
+        }
+    return tuple(
+        results[model][key] for model in models for key in ["cot_answer", "cot_time", "simple_answer", "simple_time"]
     )
+# Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Классификация клиентских обращений в банке (CoT vs обычный промпт)")
+    inp = gr.Textbox(label="Вопрос клиента", placeholder="Например: У меня не проходит оплата картой", lines=2)
+    btn = gr.Button("Сгенерировать")
+    results_blocks = []
+    for name in models:
+        gr.Markdown(f"### {name}")
+        cot = gr.Textbox(label="CoT ответ")
+        cot_time = gr.Textbox(label="Время CoT")
+        simple = gr.Textbox(label="Обычный ответ")
+        simple_time = gr.Textbox(label="Время обычного")
+        results_blocks.extend([cot, cot_time, simple, simple_time])
+    btn.click(generate_dual_answers, inputs=[inp], outputs=results_blocks)
+demo.launch()