Spaces:

SimrusDenuvo
/

chat

Sleeping

App Files Files Community

SimrusDenuvo commited on May 16

Commit

22c5acd

verified ·

1 Parent(s): 88dd26b

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -81

app.py CHANGED Viewed

@@ -4,51 +4,45 @@ from transformers import pipeline
 from datasets import load_dataset
 # Загружаем датасет
-DATASET_NAME = "Romjiik/Russian_bank_reviews"
-dataset = load_dataset(DATASET_NAME, split="train")
-# Краткий список примеров для подстановки в промпт (для классификации)
 few_shot_examples = [
-    "Клиент: Не могу войти в приложение.\nКлассификация: Техническая проблема",
-    "Клиент: Почему с меня сняли деньги дважды?\nКлассификация: Ошибка транзакции",
-    "Клиент: Хочу оформить кредит.\nКлассификация: Запрос на продукт",
-    "Клиент: У меня украли карту.\nКлассификация: Безопасность",
-    "Клиент: Не приходит СМС для входа.\nКлассификация: Проблема авторизации"
 ]
 # Инструкции
 cot_instruction = (
-    "Ты — банковский помощник. Клиент описывает ситуацию. "
-    "Проанализируй обращение шаг за шагом и определи категорию (например: 'Техническая проблема', 'Запрос на продукт', 'Безопасность' и т.п.)"
 )
 simple_instruction = (
-    "Ты — банковский помощник. Клиент описывает обращение. "
-    "Кратко укажи категорию обращения (например: 'Техническая проблема', 'Запрос на продукт', 'Безопасность' и т.п.)."
 )
-# Используемые модели (CPU-compatible, ≤16GB)
-models = {
-    "ChatGPT-like (FRED-T5-small)": pipeline("text2text-generation", model="cointegrated/rugpt3small_based_on_gpt2", tokenizer="ai-forever/FRED-T5-Base", device=-1),
-    "DeepSeek-like (ruGPT3-small)": pipeline("text-generation", model="ai-forever/rugpt3small_based_on_gpt2", tokenizer="ai-forever/rugpt3small_based_on_gpt2", device=-1),
-    "GigaChat-like (RuBERT-tiny2-clf)": pipeline("text-classification", model="cointegrated/rubert-tiny2", tokenizer="cointegrated/rubert-tiny2", device=-1)
-}
-# Построение промптов
 def build_cot_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
-    return (
-        f"{cot_instruction}\n\n{examples}\n\nКлиент: {user_input}\nРассуждение и классификация:"
-    )
 def build_simple_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
-    return (
-        f"{simple_instruction}\n\n{examples}\n\nКлиент: {user_input}\nКлассификация:"
-    )
-# Генерация классификаций
 def generate_dual_answers(user_input):
     results = {}
@@ -56,68 +50,62 @@ def generate_dual_answers(user_input):
     prompt_simple = build_simple_prompt(user_input)
     for name, pipe in models.items():
-        if "text-generation" in str(pipe.task):
-            # CoT
-            start_cot = time.time()
-            out_cot = pipe(prompt_cot, max_length=256, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
-            end_cot = round(time.time() - start_cot, 2)
-            answer_cot = out_cot.strip().split("\n")[-1]
-            # Simple
-            start_simple = time.time()
-            out_simple = pipe(prompt_simple, max_length=128, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
-            end_simple = round(time.time() - start_simple, 2)
-            answer_simple = out_simple.strip().split("\n")[-1]
-        elif "text2text-generation" in str(pipe.task):
             start_cot = time.time()
-            out_cot = pipe(prompt_cot, max_new_tokens=50)[0]["generated_text"]
             end_cot = round(time.time() - start_cot, 2)
             start_simple = time.time()
-            out_simple = pipe(prompt_simple, max_new_tokens=30)[0]["generated_text"]
             end_simple = round(time.time() - start_simple, 2)
-            answer_cot = out_cot.strip()
-            answer_simple = out_simple.strip()
-        elif "text-classification" in str(pipe.task):
-            # Для классификации используем только сам ввод без промпта
-            start = time.time()
-            answer = pipe(user_input)[0]['label']
-            end = round(time.time() - start, 2)
-            answer_cot = answer
-            answer_simple = answer
-            end_cot = end_simple = end
-        results[name] = {
-            "cot_answer": answer_cot,
-            "cot_time": end_cot,
-            "simple_answer": answer_simple,
-            "simple_time": end_simple
-        }
-    return tuple(
-        results[model][key] for model in models for key in ["cot_answer", "cot_time", "simple_answer", "simple_time"]
     )
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Классификация клиентских обращений в банке (CoT vs обычный промпт)")
-    inp = gr.Textbox(label="Вопрос клиента", placeholder="Например: У меня не проходит оплата картой", lines=2)
-    btn = gr.Button("Сгенерировать")
-    results_blocks = []
-    for name in models:
-        gr.Markdown(f"### {name}")
-        cot = gr.Textbox(label="CoT ответ")
-        cot_time = gr.Textbox(label="Время CoT")
-        simple = gr.Textbox(label="Обычный ответ")
-        simple_time = gr.Textbox(label="Время обычного")
-        results_blocks.extend([cot, cot_time, simple, simple_time])
-    btn.click(generate_dual_answers, inputs=[inp], outputs=results_blocks)
 demo.launch()

 from datasets import load_dataset
 # Загружаем датасет
+dataset = load_dataset("Romjiik/Russian_bank_reviews", split="train")
+# Примеры классификации (вручную или через разметку датасета)
 few_shot_examples = [
+    "Клиент: Я не могу войти в личный кабинет\nКлассификация: Проблема с доступом",
+    "Клиент: Хочу оформить кредит на авто\nКлассификация: Кредитование",
+    "Клиент: Почему списали деньги с карты?\nКлассификация: Жалоба на транзакцию"
 ]
 # Инструкции
 cot_instruction = (
+    "Ты — банковский помощник. Клиент описывает обращение."
+    " Проанализируй обращение пошагово, определи его суть и укажи категорию обращения."
+    " Дай только итоговую классификацию."
 )
 simple_instruction = (
+    "Ты — банковский помощник. Определи, к какой категории относится обращение клиента."
+    " Ответ должен быть кратким: только категория."
 )
+# Промпт CoT
 def build_cot_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
+    return f"{cot_instruction}\n\n{examples}\n\nКлиент: {user_input}\nРассуждение:"
+# Промпт простой
 def build_simple_prompt(user_input):
     examples = "\n\n".join(few_shot_examples)
+    return f"{simple_instruction}\n\n{examples}\n\nКлиент: {user_input}\nКлассификация:"
+# Подключаем реальные модели (только существующие и совместимые)
+models = {
+    "GPT2-large": pipeline("text-generation", model="cointegrated/rugpt2-large", tokenizer="cointegrated/rugpt2-large", device=-1),
+    "RuBERT-tiny2": pipeline("text-classification", model="cointegrated/rubert-tiny2", tokenizer="cointegrated/rubert-tiny2", device=-1),
+    "ruGPT3-medium": pipeline("text-generation", model="IlyaGusev/rugpt3medium_based_on_gpt2", tokenizer="IlyaGusev/rugpt3medium_based_on_gpt2", device=-1),
+}
+# Генерация ответов
 def generate_dual_answers(user_input):
     results = {}
     prompt_simple = build_simple_prompt(user_input)
     for name, pipe in models.items():
+        if "classification" in str(pipe.task):
+            start = time.time()
+            simple = pipe(user_input)[0]['label']
+            end = round(time.time() - start, 2)
+            results[name] = {
+                "cot": "(не поддерживается)",
+                "cot_time": "-",
+                "simple": simple,
+                "simple_time": f"{end} сек"
+            }
+        else:
             start_cot = time.time()
+            out_cot = pipe(prompt_cot, max_length=200, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_cot = round(time.time() - start_cot, 2)
+            cot_answer = out_cot.split("Классификация:")[-1].strip()
             start_simple = time.time()
+            out_simple = pipe(prompt_simple, max_length=200, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
             end_simple = round(time.time() - start_simple, 2)
+            simple_answer = out_simple.split("Классификация:")[-1].strip()
+            results[name] = {
+                "cot": cot_answer,
+                "cot_time": f"{end_cot} сек",
+                "simple": simple_answer,
+                "simple_time": f"{end_simple} сек"
+            }
+    return (
+        results["GPT2-large"]["cot"], results["GPT2-large"]["cot_time"], results["GPT2-large"]["simple"], results["GPT2-large"]["simple_time"],
+        results["RuBERT-tiny2"]["cot"], results["RuBERT-tiny2"]["cot_time"], results["RuBERT-tiny2"]["simple"], results["RuBERT-tiny2"]["simple_time"],
+        results["ruGPT3-medium"]["cot"], results["ruGPT3-medium"]["cot_time"], results["ruGPT3-medium"]["simple"], results["ruGPT3-medium"]["simple_time"]
     )
+# Интерфейс Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## 🏦 Классификация клиентских обращений (CoT и обычный промпт)")
+    inp = gr.Textbox(label="Обращение клиента", placeholder="Например: Почему не работает мобильный банк?", lines=2)
+    btn = gr.Button("Анализировать")
+    gr.Markdown("### GPT2-large")
+    cot1, cot1_time = gr.Textbox(label="CoT ответ"), gr.Textbox(label="Время CoT")
+    simple1, simple1_time = gr.Textbox(label="Обычный ответ"), gr.Textbox(label="Время ответа")
+    gr.Markdown("### RuBERT-tiny2")
+    cot2, cot2_time = gr.Textbox(label="CoT ответ"), gr.Textbox(label="Время CoT")
+    simple2, simple2_time = gr.Textbox(label="Обычный ответ"), gr.Textbox(label="Время ответа")
+    gr.Markdown("### ruGPT3-medium")
+    cot3, cot3_time = gr.Textbox(label="CoT ответ"), gr.Textbox(label="Время CoT")
+    simple3, simple3_time = gr.Textbox(label="Обычный ответ"), gr.Textbox(label="Время ответа")
+    btn.click(generate_dual_answers, inputs=[inp], outputs=[
+        cot1, cot1_time, simple1, simple1_time,
+        cot2, cot2_time, simple2, simple2_time,
+        cot3, cot3_time, simple3, simple3_time
+    ])
 demo.launch()