Spaces:

sadovsky
/

MBTI

Running

App Files Files Community

QAway-to commited on Oct 29, 2025

Commit

34fcc83

1 Parent(s): 7b33aee

New model and structure.

Browse files

Files changed (2) hide show

app.py +4 -32
core/interviewer.py +48 -71

app.py CHANGED Viewed

@@ -2,32 +2,9 @@
 import gradio as gr
 import asyncio
 from itertools import cycle
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 from core.utils import generate_first_question
 from core.mbti_analyzer import analyze_mbti
-from core.interviewer import generate_question, session_state
-# --------------------------------------------------------------
-# ✅ Всегда используем публичную модель Flan-T5-Small
-# --------------------------------------------------------------
-QG_MODEL = "google/flan-t5-small"
-try:
-    tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
-    model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
-    QG_PIPE = pipeline(
-        "text2text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        max_new_tokens=40,
-        num_beams=4,
-        no_repeat_ngram_size=4,
-    )
-    print(f"✅ Loaded public interviewer model: {QG_MODEL}")
-except Exception as e:
-    raise RuntimeError(f"❌ Failed to load {QG_MODEL}: {e}")
 # --------------------------------------------------------------
 # 🌀 Асинхронная анимация "Thinking..."
@@ -47,7 +24,6 @@ def analyze_and_ask(user_text, prev_count):
         yield "⚠️ Please enter your answer.", "", prev_count
         return
-    user_id = "default_user"
     try:
         n = int(prev_count.split("/")[0]) + 1
     except Exception:
@@ -64,16 +40,12 @@ def analyze_and_ask(user_text, prev_count):
         mbti_text = chunk
         yield mbti_text, "💭 Interviewer is thinking... ⠙", counter
-    # генерация вопроса
     try:
-        question = generate_question(user_id=user_id, user_answer=user_text, qg_pipe=QG_PIPE)
     except Exception as e:
         question = f"⚠️ Question generator error: {e}"
-    if question.startswith("✅ All"):
-        yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
-        return
     yield mbti_text, question, counter
@@ -83,7 +55,7 @@ def analyze_and_ask(user_text, prev_count):
 with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
     gr.Markdown(
         "## 🧠 MBTI Personality Interviewer\n"
-        "Определи личностный тип и получи вопросы из разных категорий MBTI."
     )
     with gr.Row():

 import gradio as gr
 import asyncio
 from itertools import cycle
 from core.utils import generate_first_question
 from core.mbti_analyzer import analyze_mbti
+from core.interviewer import generate_question
 # --------------------------------------------------------------
 # 🌀 Асинхронная анимация "Thinking..."
         yield "⚠️ Please enter your answer.", "", prev_count
         return
     try:
         n = int(prev_count.split("/")[0]) + 1
     except Exception:
         mbti_text = chunk
         yield mbti_text, "💭 Interviewer is thinking... ⠙", counter
+    # генерация вопроса новой моделью (без инструкций)
     try:
+        question = generate_question()
     except Exception as e:
         question = f"⚠️ Question generator error: {e}"
     yield mbti_text, question, counter
 with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
     gr.Markdown(
         "## 🧠 MBTI Personality Interviewer\n"
+        "Определи личностный тип и получи случайные вопросы MBTI категории."
     )
     with gr.Row():

core/interviewer.py CHANGED Viewed

@@ -1,96 +1,73 @@
 # core/interviewer.py
 """
-🇬🇧 Interviewer logic module
-Generates MBTI-category-based questions blindly (without reading user input).
-🇷🇺 Модуль интервьюера!
-Генерирует вопросы по категориям MBTI, не анализируя ответы пользователя.
 """
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # --------------------------------------------------------------
-# 1️⃣ Настройки
 # --------------------------------------------------------------
-QG_MODEL = "google/flan-t5-small"
 tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
 model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
-QG_PIPE = pipeline(
-    "text2text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=40,
-    num_beams=4,
-    no_repeat_ngram_size=4,
-)
 # --------------------------------------------------------------
-# 2️⃣ Состояние сессии
 # --------------------------------------------------------------
-session_state = {
-    "history": {},
-    "categories": [
-        "Extroversion", "Introversion",
-        "Sensing", "Intuition",
-        "Thinking", "Feeling",
-        "Judging", "Perceiving",
-    ],
-}
 # --------------------------------------------------------------
-# 3️⃣ Очистка текста от инструкций
 # --------------------------------------------------------------
-def _clean(q: str) -> str:
-    q = (q or "").strip()
-    bad = ["generate", "question", "output", "instruction", "explain", "user", "context"]
-    lower = q.lower()
-    for b in bad:
-        if b in lower:
-            idx = lower.find(b) + len(b)
-            q = q[idx:].lstrip(":,. ").strip()
-            lower = q.lower()
-    if q and not q[0].isupper():
-        q = q.capitalize()
-    if "?" not in q:
-        q = q.rstrip(".") + "?"
-    return q
 # --------------------------------------------------------------
 # 4️⃣ Генерация вопроса
 # --------------------------------------------------------------
-def generate_question(user_id: str, qg_pipe=None, **kwargs) -> str:
     """
-    Возвращает один новый вопрос по следующей неиспользованной MBTI-оси.
-    Не использует ответ пользователя.
     """
-    history = session_state["history"].get(user_id, {"asked": []})
-    asked = history["asked"]
-    cats = session_state["categories"]
-    # если все категории пройдены
-    if len(asked) >= len(cats):
-        return "✅ All MBTI axes covered."
-    next_cat = next(c for c in cats if c not in asked)
-    asked.append(next_cat)
-    session_state["history"][user_id] = history
-    prompt = (
-        f"Ask one natural, open-ended question about {next_cat}. "
-        f"Start with What, Why, How, or When. "
-        f"Do not include any instructions, explanations, or quotes. "
-        f"Output only the question itself."
-    )
-    pipe = qg_pipe or QG_PIPE
-    out = pipe(prompt)[0]["generated_text"]
-    question = _clean(out)
-    # fallback — если модель дала пустой или мусорный текст
-    if not question or len(question.split()) < 3:
-        question = f"What aspects of {next_cat.lower()} best describe you and why?"
-    return f"({next_cat}) {question}"

 # core/interviewer.py
 """
+🇬🇧 Interviewer logic module (no instructions)
+Generates random MBTI-style questions using a fine-tuned model.
+🇷🇺 Модуль интервьюера.
+Использует fine-tuned модель для генерации вопросов без промптов и инструкций.
 """
+import random, torch, re
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # --------------------------------------------------------------
+# 1️⃣ Настройки модели
 # --------------------------------------------------------------
+QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"
 tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
 model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device).eval()
+print(f"✅ Loaded interviewer model: {QG_MODEL}")
 # --------------------------------------------------------------
+# 2️⃣ Базовые промпты (легкий "seed", без инструкций)
 # --------------------------------------------------------------
+PROMPTS = [
+    "Personality and emotions.",
+    "Human motivation and choices.",
+    "Self-awareness and reflection.",
+    "Personal growth and behavior.",
+    "How people make decisions.",
+]
 # --------------------------------------------------------------
+# 3️⃣ Очистка текста
 # --------------------------------------------------------------
+def _clean_question(text: str) -> str:
+    """Берёт первую фразу с '?', обрезает лишнее"""
+    text = text.strip()
+    m = re.search(r"(.+?\?)", text)
+    if m:
+        text = m.group(1)
+    text = text.replace("\n", " ").strip()
+    if len(text.split()) < 3:
+        text = text.capitalize()
+        if not text.endswith("?"):
+            text += "?"
+    return text
 # --------------------------------------------------------------
 # 4️⃣ Генерация вопроса
 # --------------------------------------------------------------
+def generate_question(user_id: str = "default_user", **kwargs) -> str:
     """
+    Генерирует один MBTI-вопрос без инструкций.
     """
+    prompt = random.choice(PROMPTS)
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
+    with torch.no_grad():
+        out = model.generate(
+            **inputs,
+            do_sample=True,
+            top_p=0.9,
+            temperature=0.9,
+            repetition_penalty=1.1,
+            max_new_tokens=60,
+        )
+    text = tokenizer.decode(out[0], skip_special_tokens=True)
+    question = _clean_question(text)
+    return question