skatzR
/

RQA-R2

@@ -1,13 +1,34 @@
 # requirements
-!pip install torch==2.8.0 torchvision==0.17.2
 !pip install transformers==4.48.3 tokenizers sentencepiece accelerate
 import torch
 from typing import List, Optional
 from transformers import AutoTokenizer, AutoModel
 ERROR_NAMES_RU = {
     "false_causality": "Ложная причинно-следственная связь",
     "unsupported_claim": "Неподкрепленное утверждение",
@@ -18,6 +39,10 @@ ERROR_NAMES_RU = {
 }
 class RQAJudge:
     def __init__(self, model_name="skatzR/RQA-R2", device=None, max_length: int = 512):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
@@ -35,16 +60,24 @@ class RQAJudge:
         self.model.eval()
         cfg = self.model.config
-        self.error_types = list(cfg.error_types)
-        self.temp_issue = float(cfg.temperature_has_issue)
-        self.temp_hidden = float(cfg.temperature_is_hidden)
-        self.temp_errors = list(cfg.temperature_errors)
-        self.threshold_issue = float(cfg.threshold_has_issue)
-        self.threshold_hidden = float(cfg.threshold_is_hidden)
-        self.threshold_error = float(cfg.threshold_error)
-        self.threshold_errors = list(cfg.threshold_errors)
     @torch.no_grad()
     def infer(
@@ -73,20 +106,15 @@ class RQAJudge:
         outputs = self.model(**inputs)
         issue_logit = outputs["has_issue_logits"] / self.temp_issue
-        hidden_logit = outputs["is_hidden_logits"] / self.temp_hidden
-        error_logits = outputs["errors_logits"][0].clone()
-        for i in range(len(self.error_types)):
-            error_logits[i] = error_logits[i] / self.temp_errors[i]
         issue_prob = torch.sigmoid(issue_logit).item()
         has_issue = issue_prob >= issue_threshold
         result = {
             "text": text,
-            "class": None,
-            "status": "ok",
             "review_required": False,
             "has_issue": has_issue,
             "issue_probability": issue_prob,
@@ -94,21 +122,24 @@ class RQAJudge:
             "hidden_probability": None,
             "errors": [],
             "num_errors": 0,
             "threshold_issue": issue_threshold,
             "threshold_hidden": hidden_threshold,
             "threshold_error": error_threshold,
             "threshold_errors": error_thresholds,
-            "schema_version": getattr(self.model.config, "schema_version", "unknown"),
         }
         if abs(issue_prob - issue_threshold) <= issue_uncertain_margin:
             result["status"] = "uncertain"
             result["review_required"] = True
         if not has_issue:
             result["class"] = "logical"
             return result
         hidden_prob = torch.sigmoid(hidden_logit).item()
         is_hidden = hidden_prob >= hidden_threshold
@@ -119,14 +150,23 @@ class RQAJudge:
             result["status"] = "uncertain"
             result["review_required"] = True
         if is_hidden:
             result["class"] = "hidden"
             return result
-        error_probs = torch.sigmoid(error_logits).tolist()
-        detected = []
         for i, err_name in enumerate(self.error_types):
-            prob = float(error_probs[i])
             threshold_i = float(error_thresholds[i] if i < len(error_thresholds) else error_threshold)
             if abs(prob - threshold_i) <= error_uncertain_margin:
@@ -134,15 +174,19 @@ class RQAJudge:
                 result["review_required"] = True
             if prob >= threshold_i:
-                detected.append((err_name, prob))
-        detected.sort(key=lambda x: x[1], reverse=True)
         result["class"] = "explicit"
-        result["errors"] = detected
-        result["num_errors"] = len(detected)
         return result
     def pretty_print(self, r):
         print("\n" + "=" * 72)
         print("📄 Текст:")
@@ -155,11 +199,11 @@ class RQAJudge:
         print(f"🧠 Класс: {r['class']}")
         if r["status"] == "uncertain":
-            print("⚠️ Статус: uncertain")
         if r["hidden_probability"] is not None:
             print(
-                f"🟡 Hidden: {'ДА' if r['hidden_problem'] else 'НЕТ'} "
                 f"({r['hidden_probability'] * 100:.2f}%)"
             )
@@ -171,3 +215,202 @@ class RQAJudge:
             print("\n✅ Явных логических ошибок не обнаружено")
         print("=" * 72)

 # requirements
+# Для inference в Colab достаточно этого стека.
 !pip install transformers==4.48.3 tokenizers sentencepiece accelerate
+# ============================================================
+# RQA UX Inference — R2 Interactive Version
+# Google Colab + CLI friendly
+# ============================================================
+import os
+import json
+import csv
 import torch
 from typing import List, Optional
 from transformers import AutoTokenizer, AutoModel
+# ============================================================
+# Константы
+# ============================================================
+ERROR_TYPES = [
+    "false_causality",
+    "unsupported_claim",
+    "overgeneralization",
+    "missing_premise",
+    "contradiction",
+    "circular_reasoning",
+]
 ERROR_NAMES_RU = {
     "false_causality": "Ложная причинно-следственная связь",
     "unsupported_claim": "Неподкрепленное утверждение",
 }
+# ============================================================
+# RQA Judge
+# ============================================================
 class RQAJudge:
     def __init__(self, model_name="skatzR/RQA-R2", device=None, max_length: int = 512):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.model.eval()
         cfg = self.model.config
+        self.error_types = list(getattr(cfg, "error_types", ERROR_TYPES))
+        self.temp_issue = float(getattr(cfg, "temperature_has_issue", 1.0))
+        self.temp_hidden = float(getattr(cfg, "temperature_is_hidden", 1.0))
+        self.temp_errors = list(
+            getattr(cfg, "temperature_errors", [1.0] * len(self.error_types))
+        )
+        self.threshold_issue = float(getattr(cfg, "threshold_has_issue", 0.5))
+        self.threshold_hidden = float(getattr(cfg, "threshold_is_hidden", 0.5))
+        self.threshold_error = float(getattr(cfg, "threshold_error", 0.5))
+        self.threshold_errors = list(
+            getattr(cfg, "threshold_errors", [self.threshold_error] * len(self.error_types))
+        )
+    # ----------------------
+    # Core inference
+    # ----------------------
     @torch.no_grad()
     def infer(
         outputs = self.model(**inputs)
+        # ----- has_issue -----
         issue_logit = outputs["has_issue_logits"] / self.temp_issue
         issue_prob = torch.sigmoid(issue_logit).item()
         has_issue = issue_prob >= issue_threshold
         result = {
             "text": text,
+            "class": None,             # logical / hidden / explicit
+            "status": "ok",            # ok / uncertain
             "review_required": False,
             "has_issue": has_issue,
             "issue_probability": issue_prob,
             "hidden_probability": None,
             "errors": [],
             "num_errors": 0,
+            "schema_version": getattr(self.model.config, "schema_version", "unknown"),
             "threshold_issue": issue_threshold,
             "threshold_hidden": hidden_threshold,
             "threshold_error": error_threshold,
             "threshold_errors": error_thresholds,
         }
         if abs(issue_prob - issue_threshold) <= issue_uncertain_margin:
             result["status"] = "uncertain"
             result["review_required"] = True
+        # ----- Gate 1: logical -----
         if not has_issue:
             result["class"] = "logical"
             return result
+        # ----- hidden -----
+        hidden_logit = outputs["is_hidden_logits"] / self.temp_hidden
         hidden_prob = torch.sigmoid(hidden_logit).item()
         is_hidden = hidden_prob >= hidden_threshold
             result["status"] = "uncertain"
             result["review_required"] = True
+        # ----- Gate 2: hidden -----
         if is_hidden:
             result["class"] = "hidden"
             return result
+        # ----- explicit errors -----
+        raw_error_logits = outputs["errors_logits"][0].clone()
+        error_probs = {}
+        for i, logit in enumerate(raw_error_logits):
+            calibrated = logit / self.temp_errors[i]
+            prob = torch.sigmoid(calibrated).item()
+            error_probs[self.error_types[i]] = prob
+        explicit_errors = []
         for i, err_name in enumerate(self.error_types):
+            prob = float(error_probs[err_name])
             threshold_i = float(error_thresholds[i] if i < len(error_thresholds) else error_threshold)
             if abs(prob - threshold_i) <= error_uncertain_margin:
                 result["review_required"] = True
             if prob >= threshold_i:
+                explicit_errors.append((err_name, prob))
+        explicit_errors.sort(key=lambda x: x[1], reverse=True)
         result["class"] = "explicit"
+        result["errors"] = explicit_errors
+        result["num_errors"] = len(explicit_errors)
         return result
+    # ============================================================
+    # UX output
+    # ============================================================
     def pretty_print(self, r):
         print("\n" + "=" * 72)
         print("📄 Текст:")
         print(f"🧠 Класс: {r['class']}")
         if r["status"] == "uncertain":
+            print("⚠️ Пограничный случай: review recommended")
         if r["hidden_probability"] is not None:
             print(
+                f"🟡 Hidden-проблема: {'ДА' if r['hidden_problem'] else 'НЕТ'} "
                 f"({r['hidden_probability'] * 100:.2f}%)"
             )
             print("\n✅ Явных логических ошибок не обнаружено")
         print("=" * 72)
+# ============================================================
+# Loaders
+# ============================================================
+def load_texts_from_file(path: str) -> List[str]:
+    ext = os.path.splitext(path)[1].lower()
+    if ext == ".txt":
+        with open(path, encoding="utf-8") as f:
+            return [line.strip() for line in f if line.strip()]
+    if ext == ".csv":
+        with open(path, encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+            return [row["text"] for row in reader if row.get("text")]
+    if ext == ".json":
+        with open(path, encoding="utf-8") as f:
+            data = json.load(f)
+        if isinstance(data, list):
+            if all(isinstance(item, str) for item in data):
+                return data
+            texts = []
+            for item in data:
+                if isinstance(item, dict) and "text" in item:
+                    texts.append(str(item["text"]))
+            return texts
+    raise ValueError("Неподдерживаемый формат файла")
+# ============================================================
+# Interactive CLI Interface
+# ============================================================
+class InteractiveCLI:
+    def __init__(self, model_name="skatzR/RQA-R2"):
+        self.judge = RQAJudge(model_name=model_name)
+    def clear_screen(self):
+        print("\n" * 2)
+    def show_mode_menu(self):
+        self.clear_screen()
+        print("=" * 60)
+        print("🤖 RQA-R2 — АНАЛИЗ ЛОГИЧЕСКИХ ОШИБОК")
+        print("=" * 60)
+        print("\nВыберите режим работы:")
+        print("1. 📝 Одиночный ввод (одна фраза для анализа)")
+        print("2. 📄 Множественный ввод (несколько фраз, каждая с новой строки)")
+        print("3. 📂 Загрузка из файла (.txt, .csv, .json)")
+        print("\nНажмите Enter без ввода для выхода.")
+        print("-" * 60)
+    def process_single_mode(self):
+        self.clear_screen()
+        print("[📝 РЕЖИМ: ОДИНОЧНЫЙ ВВОД]")
+        print("Введите текст для анализа:")
+        print("(Нажмите Enter без ввода для возврата в меню)")
+        print("-" * 40)
+        text = input("> ").strip()
+        if not text:
+            return True
+        result = self.judge.infer(text)
+        self.judge.pretty_print(result)
+        print("\n" + "-" * 40)
+        input("Нажмите Enter для продолжения...")
+        return False
+    def process_multiline_mode(self):
+        self.clear_screen()
+        print("[📄 РЕЖИМ: МНОЖЕСТВЕННЫЙ ВВОД]")
+        print("Введите тексты для анализа (каждый с новой строки).")
+        print("Оставьте строку пустой для завершения ввода.")
+        print("(Нажмите Enter без ввода для возврата в меню)")
+        print("-" * 40)
+        texts = []
+        print("Ввод текстов:")
+        while True:
+            line = input("> ").strip()
+            if not line:
+                if not texts:
+                    return True
+                break
+            texts.append(line)
+        self.clear_screen()
+        print(f"[📄 РЕЖИМ: МНОЖЕСТВЕННЫЙ ВВОД] — найдено {len(texts)} текстов")
+        print("-" * 40)
+        for i, text in enumerate(texts, 1):
+            print(f"\n🔍 Текст #{i}:")
+            result = self.judge.infer(text)
+            self.judge.pretty_print(result)
+        print("\n" + "=" * 60)
+        input("Нажмите Enter для продолжения...")
+        return False
+    def process_file_mode(self):
+        self.clear_screen()
+        print("[📂 РЕЖИМ: ЗАГРУЗКА ИЗ ФАЙЛА]")
+        print("Поддерживаемые форматы: .txt, .csv, .json")
+        print("Укажите путь к файлу:")
+        print("(Нажмите Enter без ввода для возврата в меню)")
+        print("-" * 40)
+        file_path = input("Путь к файлу> ").strip()
+        if not file_path:
+            return True
+        try:
+            if not os.path.exists(file_path):
+                print(f"\n❌ Ошибка: Файл '{file_path}' не найден!")
+                input("\nНажмите Enter для продолжения...")
+                return False
+            texts = load_texts_from_file(file_path)
+            if not texts:
+                print(f"\n⚠️ Файл '{file_path}' пуст или не содержит текстов!")
+                input("\nНажмите Enter для продолжения...")
+                return False
+            self.clear_screen()
+            print(f"[📂 РЕЖИМ: ЗАГРУЗКА ИЗ ФАЙЛА] — загружено {len(texts)} текстов")
+            print(f"Файл: {file_path}")
+            print("-" * 40)
+            for i, text in enumerate(texts, 1):
+                print(f"\n🔍 Текст #{i}:")
+                result = self.judge.infer(text)
+                self.judge.pretty_print(result)
+            print("\n" + "=" * 60)
+            input("Нажмите Enter для продолжения...")
+        except Exception as e:
+            print(f"\n❌ Ошибка при обработке файла: {str(e)}")
+            input("\nНажмите Enter для продолжения...")
+        return False
+    def run_interactive(self):
+        current_mode = None
+        while True:
+            if not current_mode:
+                self.show_mode_menu()
+                choice = input("Ваш выбор (1-3)> ").strip()
+                if not choice:
+                    print("\n👋 Выход из программы...")
+                    break
+                if choice == "1":
+                    current_mode = "single"
+                elif choice == "2":
+                    current_mode = "multiline"
+                elif choice == "3":
+                    current_mode = "file"
+                else:
+                    print("\n❌ Неверный выбор! Попробуйте снова.")
+                    input("Нажмите Enter для продолжения...")
+                    continue
+            should_return_to_menu = False
+            if current_mode == "single":
+                should_return_to_menu = self.process_single_mode()
+            elif current_mode == "multiline":
+                should_return_to_menu = self.process_multiline_mode()
+            elif current_mode == "file":
+                should_return_to_menu = self.process_file_mode()
+            if should_return_to_menu:
+                current_mode = None
+# ============================================================
+# Точка входа
+# ============================================================
+def main():
+    cli = InteractiveCLI()
+    cli.run_interactive()
+# ============================================================
+# Запуск
+# ============================================================
+if __name__ == "__main__":
+    main()