Genesis-ai / ai_core.py
NeVasilek's picture
Upload ai_core.py
f9cde06 verified
import os
import torch
import logging
import threading
from typing import Optional
from PIL import Image
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
BlipProcessor,
BlipForQuestionAnswering
)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Используем модель 0.5B — она самая быстрая для работы БЕЗ токенов на бесплатном CPU
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
class GenesisAI:
def __init__(self):
logging.info(f"Инициализация Genesis AI локально ({MODEL_ID})")
self.device = torch.device("cpu")
self.dtype = torch.float32
# Загрузка текстовой модели
logging.info("Загрузка текста...")
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
self.model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=self.dtype,
device_map="auto"
)
# Загрузка зрения (BLIP)
logging.info("Загрузка зрения...")
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
self.blip_model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base",
torch_dtype=self.dtype
).to(self.device)
self.blip_model.eval()
logging.info("Genesis AI готов! Работает локально на CPU.")
def answer_image_question(self, image: Image.Image, question: str) -> str:
inputs = self.blip_processor(image, question, return_tensors="pt").to(self.device, dtype=self.dtype)
with torch.inference_mode():
out = self.blip_model.generate(**inputs, max_new_tokens=40)
return self.blip_processor.decode(out[0], skip_special_tokens=True)
def answer_text_stream(self, question: str):
messages = [
{"role": "system", "content": "Ты ИИ-ассистент Genesis. Тебя создал невасилек. Отвечай кратко и понятно."},
{"role": "user", "content": question}
]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=128,
do_sample=True,
temperature=0.7,
repetition_penalty=1.2
)
thread = threading.Thread(target=self.model.generate, kwargs=generation_kwargs)
thread.start()
for new_text in streamer:
yield new_text
def answer_text_question(self, question: str) -> str:
result = ""
for chunk in self.answer_text_stream(question):
result += chunk
return result