Spaces:

sadovsky
/

MBTI

Running

MBTI / core /interviewer.py

QAway-to

+ Strea,

4c6f761 4 months ago

1.6 kB

	# core/interviewer.py
	import torch
	import threading
	from transformers import AutoModelForSeq2SeqLM, T5Tokenizer, TextIteratorStreamer

	QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen_v2"

	tokenizer = T5Tokenizer.from_pretrained(QG_MODEL, use_fast=False)
	model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device).eval()

	print(f"✅ Loaded interviewer model (streaming ready): {QG_MODEL}")

	# обычная версия (если нужно fallback)
	def generate_question(prompt: str = "Generate one thoughtful question.") -> str:
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
	with torch.no_grad():
	output = model.generate(**inputs, max_new_tokens=80)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	# потоковая версия
	def stream_question(prompt: str = "Generate one thoughtful question."):
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	generation_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=80,
	do_sample=True,
	top_p=0.9,
	temperature=1.1,
	top_k=60,
	repetition_penalty=1.3,
	)

	# модель работает в отдельном потоке
	thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	partial = ""
	for new_text in streamer:
	partial += new_text
	yield partial