QAway-to commited on
Commit
34fcc83
·
1 Parent(s): 7b33aee

New model and structure.

Browse files
Files changed (2) hide show
  1. app.py +4 -32
  2. core/interviewer.py +48 -71
app.py CHANGED
@@ -2,32 +2,9 @@
2
  import gradio as gr
3
  import asyncio
4
  from itertools import cycle
5
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
  from core.utils import generate_first_question
7
  from core.mbti_analyzer import analyze_mbti
8
- from core.interviewer import generate_question, session_state
9
-
10
-
11
- # --------------------------------------------------------------
12
- # ✅ Всегда используем публичную модель Flan-T5-Small
13
- # --------------------------------------------------------------
14
- QG_MODEL = "google/flan-t5-small"
15
-
16
- try:
17
- tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
18
- model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
19
- QG_PIPE = pipeline(
20
- "text2text-generation",
21
- model=model,
22
- tokenizer=tokenizer,
23
- max_new_tokens=40,
24
- num_beams=4,
25
- no_repeat_ngram_size=4,
26
- )
27
- print(f"✅ Loaded public interviewer model: {QG_MODEL}")
28
- except Exception as e:
29
- raise RuntimeError(f"❌ Failed to load {QG_MODEL}: {e}")
30
-
31
 
32
  # --------------------------------------------------------------
33
  # 🌀 Асинхронная анимация "Thinking..."
@@ -47,7 +24,6 @@ def analyze_and_ask(user_text, prev_count):
47
  yield "⚠️ Please enter your answer.", "", prev_count
48
  return
49
 
50
- user_id = "default_user"
51
  try:
52
  n = int(prev_count.split("/")[0]) + 1
53
  except Exception:
@@ -64,16 +40,12 @@ def analyze_and_ask(user_text, prev_count):
64
  mbti_text = chunk
65
  yield mbti_text, "💭 Interviewer is thinking... ⠙", counter
66
 
67
- # генерация вопроса
68
  try:
69
- question = generate_question(user_id=user_id, user_answer=user_text, qg_pipe=QG_PIPE)
70
  except Exception as e:
71
  question = f"⚠️ Question generator error: {e}"
72
 
73
- if question.startswith("✅ All"):
74
- yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
75
- return
76
-
77
  yield mbti_text, question, counter
78
 
79
 
@@ -83,7 +55,7 @@ def analyze_and_ask(user_text, prev_count):
83
  with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
84
  gr.Markdown(
85
  "## 🧠 MBTI Personality Interviewer\n"
86
- "Определи личностный тип и получи вопросы из разных категорий MBTI."
87
  )
88
 
89
  with gr.Row():
 
2
  import gradio as gr
3
  import asyncio
4
  from itertools import cycle
 
5
  from core.utils import generate_first_question
6
  from core.mbti_analyzer import analyze_mbti
7
+ from core.interviewer import generate_question
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # --------------------------------------------------------------
10
  # 🌀 Асинхронная анимация "Thinking..."
 
24
  yield "⚠️ Please enter your answer.", "", prev_count
25
  return
26
 
 
27
  try:
28
  n = int(prev_count.split("/")[0]) + 1
29
  except Exception:
 
40
  mbti_text = chunk
41
  yield mbti_text, "💭 Interviewer is thinking... ⠙", counter
42
 
43
+ # генерация вопроса новой моделью (без инструкций)
44
  try:
45
+ question = generate_question()
46
  except Exception as e:
47
  question = f"⚠️ Question generator error: {e}"
48
 
 
 
 
 
49
  yield mbti_text, question, counter
50
 
51
 
 
55
  with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
56
  gr.Markdown(
57
  "## 🧠 MBTI Personality Interviewer\n"
58
+ "Определи личностный тип и получи случайные вопросы MBTI категории."
59
  )
60
 
61
  with gr.Row():
core/interviewer.py CHANGED
@@ -1,96 +1,73 @@
1
  # core/interviewer.py
2
  """
3
- 🇬🇧 Interviewer logic module
4
- Generates MBTI-category-based questions blindly (without reading user input).
5
 
6
- 🇷🇺 Модуль интервьюера!
7
- Генерирует вопросы по категориям MBTI, не анализируя ответы пользователя.
8
  """
9
 
10
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
11
 
12
  # --------------------------------------------------------------
13
- # 1️⃣ Настройки
14
  # --------------------------------------------------------------
15
- QG_MODEL = "google/flan-t5-small"
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
18
  model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
 
 
19
 
20
- QG_PIPE = pipeline(
21
- "text2text-generation",
22
- model=model,
23
- tokenizer=tokenizer,
24
- max_new_tokens=40,
25
- num_beams=4,
26
- no_repeat_ngram_size=4,
27
- )
28
 
29
  # --------------------------------------------------------------
30
- # 2️⃣ Состояние сессии
31
  # --------------------------------------------------------------
32
- session_state = {
33
- "history": {},
34
- "categories": [
35
- "Extroversion", "Introversion",
36
- "Sensing", "Intuition",
37
- "Thinking", "Feeling",
38
- "Judging", "Perceiving",
39
- ],
40
- }
41
 
42
  # --------------------------------------------------------------
43
- # 3️⃣ Очистка текста от инструкций
44
  # --------------------------------------------------------------
45
- def _clean(q: str) -> str:
46
- q = (q or "").strip()
47
- bad = ["generate", "question", "output", "instruction", "explain", "user", "context"]
48
- lower = q.lower()
49
- for b in bad:
50
- if b in lower:
51
- idx = lower.find(b) + len(b)
52
- q = q[idx:].lstrip(":,. ").strip()
53
- lower = q.lower()
54
- if q and not q[0].isupper():
55
- q = q.capitalize()
56
- if "?" not in q:
57
- q = q.rstrip(".") + "?"
58
- return q
59
 
60
 
61
  # --------------------------------------------------------------
62
  # 4️⃣ Генерация вопроса
63
  # --------------------------------------------------------------
64
- def generate_question(user_id: str, qg_pipe=None, **kwargs) -> str:
65
  """
66
- Возвращает один новый вопрос по следующей неиспользованной MBTI-оси.
67
- Не использует ответ пользователя.
68
  """
69
- history = session_state["history"].get(user_id, {"asked": []})
70
- asked = history["asked"]
71
- cats = session_state["categories"]
72
-
73
- # если все категории пройдены
74
- if len(asked) >= len(cats):
75
- return "✅ All MBTI axes covered."
76
-
77
- next_cat = next(c for c in cats if c not in asked)
78
- asked.append(next_cat)
79
- session_state["history"][user_id] = history
80
-
81
- prompt = (
82
- f"Ask one natural, open-ended question about {next_cat}. "
83
- f"Start with What, Why, How, or When. "
84
- f"Do not include any instructions, explanations, or quotes. "
85
- f"Output only the question itself."
86
- )
87
-
88
- pipe = qg_pipe or QG_PIPE
89
- out = pipe(prompt)[0]["generated_text"]
90
- question = _clean(out)
91
-
92
- # fallback — если модель дала пустой или мусорный текст
93
- if not question or len(question.split()) < 3:
94
- question = f"What aspects of {next_cat.lower()} best describe you and why?"
95
-
96
- return f"({next_cat}) {question}"
 
1
  # core/interviewer.py
2
  """
3
+ 🇬🇧 Interviewer logic module (no instructions)
4
+ Generates random MBTI-style questions using a fine-tuned model.
5
 
6
+ 🇷🇺 Модуль интервьюера.
7
+ Использует fine-tuned модель для генерации вопросов без промптов и инструкций.
8
  """
9
 
10
+ import random, torch, re
11
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
  # --------------------------------------------------------------
14
+ # 1️⃣ Настройки модели
15
  # --------------------------------------------------------------
16
+ QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
19
  model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
20
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+ model.to(device).eval()
22
 
23
+ print(f"✅ Loaded interviewer model: {QG_MODEL}")
 
 
 
 
 
 
 
24
 
25
  # --------------------------------------------------------------
26
+ # 2️⃣ Базовые промпты (легкий "seed", без инструкций)
27
  # --------------------------------------------------------------
28
+ PROMPTS = [
29
+ "Personality and emotions.",
30
+ "Human motivation and choices.",
31
+ "Self-awareness and reflection.",
32
+ "Personal growth and behavior.",
33
+ "How people make decisions.",
34
+ ]
 
 
35
 
36
  # --------------------------------------------------------------
37
+ # 3️⃣ Очистка текста
38
  # --------------------------------------------------------------
39
+ def _clean_question(text: str) -> str:
40
+ """Берёт первую фразу с '?', обрезает лишнее"""
41
+ text = text.strip()
42
+ m = re.search(r"(.+?\?)", text)
43
+ if m:
44
+ text = m.group(1)
45
+ text = text.replace("\n", " ").strip()
46
+ if len(text.split()) < 3:
47
+ text = text.capitalize()
48
+ if not text.endswith("?"):
49
+ text += "?"
50
+ return text
 
 
51
 
52
 
53
  # --------------------------------------------------------------
54
  # 4️⃣ Генерация вопроса
55
  # --------------------------------------------------------------
56
+ def generate_question(user_id: str = "default_user", **kwargs) -> str:
57
  """
58
+ Генерирует один MBTI-вопрос без инструкций.
 
59
  """
60
+ prompt = random.choice(PROMPTS)
61
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
62
+ with torch.no_grad():
63
+ out = model.generate(
64
+ **inputs,
65
+ do_sample=True,
66
+ top_p=0.9,
67
+ temperature=0.9,
68
+ repetition_penalty=1.1,
69
+ max_new_tokens=60,
70
+ )
71
+ text = tokenizer.decode(out[0], skip_special_tokens=True)
72
+ question = _clean_question(text)
73
+ return question