Spaces:

NeVasilek
/

Genesis-ai

Sleeping

App Files Files Community

NeVasilek commited on Mar 6

Commit

f9cde06

verified ·

1 Parent(s): 8132c10

Upload ai_core.py

Browse files

Files changed (1) hide show

ai_core.py +49 -34

ai_core.py CHANGED Viewed

@@ -1,31 +1,39 @@
 import os
-import io
 import logging
 from typing import Optional
-from huggingface_hub import InferenceClient
 from PIL import Image
-from transformers import BlipProcessor, BlipForQuestionAnswering
-import torch
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Используем самую мощную модель Qwen 72B через API (мгновенно и бесплатно)
-MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
 class GenesisAI:
     def __init__(self):
-        logging.info(f"Инициализация Genesis AI через Serverless API ({MODEL_ID})")
-        # Хардкодим токен для стопроцентной работы (исправлено: I -> 1)
-        # Если в коде не сработает, попробуем взять из Secrets (HF_TOKEN)
-        self.hf_token = os.getenv("HF_TOKEN") or "hf_McPYfqhXAYQfekcob1FFGFbFoBgaUEhQSS"
-        self.client = InferenceClient(model=MODEL_ID, token=self.hf_token)
-        logging.info(f"Авторизация выполнена (длина токена: {len(self.hf_token)})")
-        # Оставляем локальное зрение (BLIP), оно легкое и работает на CPU быстро
         self.device = torch.device("cpu")
         self.dtype = torch.float32
-        logging.info("Загрузка зрения (BLIP) на CPU...")
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
         self.blip_model = BlipForQuestionAnswering.from_pretrained(
             "Salesforce/blip-vqa-base",
@@ -33,7 +41,7 @@ class GenesisAI:
         ).to(self.device)
         self.blip_model.eval()
-        logging.info("Genesis AI готов к моментальной работе!")
     def answer_image_question(self, image: Image.Image, question: str) -> str:
         inputs = self.blip_processor(image, question, return_tensors="pt").to(self.device, dtype=self.dtype)
@@ -42,27 +50,34 @@ class GenesisAI:
         return self.blip_processor.decode(out[0], skip_special_tokens=True)
     def answer_text_stream(self, question: str):
-        """Мгновенный стриминг через сервера Hugging Face"""
         messages = [
-            {"role": "system", "content": "Ты ИИ-ассистент Genesis. Тебя создал невасилек. Отвечай по-доброму, кратко и максимально понятно. Ты работаешь на самой мощной модели Qwen 72B."},
             {"role": "user", "content": question}
         ]
-        try:
-            # Вызываем API Hugging Face (это происходит на их GPU, поэтому мгновенно)
-            for message in self.client.chat_completion(
-                messages=messages,
-                max_tokens=512,
-                stream=True,
-                temperature=0.7,
-                top_p=0.9
-            ):
-                token = message.choices[0].delta.content
-                if token:
-                    yield token
-        except Exception as e:
-            logging.error(f"Ошибка API: {e}")
-            yield f"Ошибка связи с сервером AI. Проверьте HF_TOKEN в настройках Space. ({str(e)})"
     def answer_text_question(self, question: str) -> str:
         result = ""

 import os
+import torch
 import logging
+import threading
 from typing import Optional
 from PIL import Image
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextIteratorStreamer,
+    BlipProcessor,
+    BlipForQuestionAnswering
+)
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Используем модель 0.5B — она самая быстрая для работы БЕЗ токенов на бесплатном CPU
+MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 class GenesisAI:
     def __init__(self):
+        logging.info(f"Инициализация Genesis AI локально ({MODEL_ID})")
         self.device = torch.device("cpu")
         self.dtype = torch.float32
+        # Загрузка текстовой модели
+        logging.info("Загрузка текста...")
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            torch_dtype=self.dtype,
+            device_map="auto"
+        )
+        # Загрузка зрения (BLIP)
+        logging.info("Загрузка зрения...")
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
         self.blip_model = BlipForQuestionAnswering.from_pretrained(
             "Salesforce/blip-vqa-base",
         ).to(self.device)
         self.blip_model.eval()
+        logging.info("Genesis AI готов! Работает локально на CPU.")
     def answer_image_question(self, image: Image.Image, question: str) -> str:
         inputs = self.blip_processor(image, question, return_tensors="pt").to(self.device, dtype=self.dtype)
         return self.blip_processor.decode(out[0], skip_special_tokens=True)
     def answer_text_stream(self, question: str):
         messages = [
+            {"role": "system", "content": "Ты ИИ-ассистент Genesis. Тебя создал невасилек. Отвечай кратко и понятно."},
             {"role": "user", "content": question}
         ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
+        streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            model_inputs,
+            streamer=streamer,
+            max_new_tokens=128,
+            do_sample=True,
+            temperature=0.7,
+            repetition_penalty=1.2
+        )
+        thread = threading.Thread(target=self.model.generate, kwargs=generation_kwargs)
+        thread.start()
+        for new_text in streamer:
+            yield new_text
     def answer_text_question(self, question: str) -> str:
         result = ""