Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import logging | |
| import threading | |
| from typing import Optional | |
| from PIL import Image | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TextIteratorStreamer, | |
| BlipProcessor, | |
| BlipForQuestionAnswering | |
| ) | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Используем модель 0.5B — она самая быстрая для работы БЕЗ токенов на бесплатном CPU | |
| MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" | |
| class GenesisAI: | |
| def __init__(self): | |
| logging.info(f"Инициализация Genesis AI локально ({MODEL_ID})") | |
| self.device = torch.device("cpu") | |
| self.dtype = torch.float32 | |
| # Загрузка текстовой модели | |
| logging.info("Загрузка текста...") | |
| self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=self.dtype, | |
| device_map="auto" | |
| ) | |
| # Загрузка зрения (BLIP) | |
| logging.info("Загрузка зрения...") | |
| self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
| self.blip_model = BlipForQuestionAnswering.from_pretrained( | |
| "Salesforce/blip-vqa-base", | |
| torch_dtype=self.dtype | |
| ).to(self.device) | |
| self.blip_model.eval() | |
| logging.info("Genesis AI готов! Работает локально на CPU.") | |
| def answer_image_question(self, image: Image.Image, question: str) -> str: | |
| inputs = self.blip_processor(image, question, return_tensors="pt").to(self.device, dtype=self.dtype) | |
| with torch.inference_mode(): | |
| out = self.blip_model.generate(**inputs, max_new_tokens=40) | |
| return self.blip_processor.decode(out[0], skip_special_tokens=True) | |
| def answer_text_stream(self, question: str): | |
| messages = [ | |
| {"role": "system", "content": "Ты ИИ-ассистент Genesis. Тебя создал невасилек. Отвечай кратко и понятно."}, | |
| {"role": "user", "content": question} | |
| ] | |
| text = self.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device) | |
| streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| model_inputs, | |
| streamer=streamer, | |
| max_new_tokens=128, | |
| do_sample=True, | |
| temperature=0.7, | |
| repetition_penalty=1.2 | |
| ) | |
| thread = threading.Thread(target=self.model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| for new_text in streamer: | |
| yield new_text | |
| def answer_text_question(self, question: str) -> str: | |
| result = "" | |
| for chunk in self.answer_text_stream(question): | |
| result += chunk | |
| return result | |