Spaces:

AugustLight
/

LLight-3.2-3b-Instruct

Sleeping

App Files Files Community

AugustLight commited on Oct 25, 2024

Commit

fe67270

verified ·

1 Parent(s): 464f8f9

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -20

app.py CHANGED Viewed

@@ -1,37 +1,87 @@
 import gradio as gr
 from ctransformers import AutoModelForCausalLM
 def load_model():
-    model = AutoModelForCausalLM.from_pretrained(
-        "Llight.Q8_0.gguf",
-        model_type="llama",
-        gpu_layers=0
-    )
     return model
-def generate_response(prompt, max_tokens=128, temperature=0.7):
     try:
-        model = load_model()
         response = model(
-            prompt,
-            max_tokens=max_tokens,
-            temperature=temperature
         )
-        return response
     except Exception as e:
         return f"Произошла ошибка: {str(e)}"
 # Создаем интерфейс
-demo = gr.Interface(
-    fn=generate_response,
-    inputs=[
-        gr.Textbox(label="Введите ваш запрос", lines=3),
-        gr.Slider(minimum=1, maximum=512, value=128, label="Max Tokens"),
-        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
     ],
-    outputs=gr.Textbox(label="Ответ модели", lines=5),
-    title="LLight Model Demo",
-    description="Демонстрация работы GGUF модели"
 )
 # Запускаем приложение

 import gradio as gr
 from ctransformers import AutoModelForCausalLM
+import os
+model = None
 def load_model():
+    global model
+    if model is None:
+        try:
+            model = AutoModelForCausalLM.from_pretrained(
+                "Llight.Q8_0.gguf",
+                model_type="llama",
+                gpu_layers=0,
+                context_length=2048
+            )
+        except Exception as e:
+            print(f"Ошибка загрузки модели: {str(e)}")
+            raise e
     return model
+def respond(message, history, system_message, max_new_tokens, temperature, top_p):
     try:
+        if model is None:
+            load_model()
+        # Формируем контекст из истории
+        context = system_message + "\n\n"
+        for user_msg, assistant_msg in history:
+            context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
+        context += f"User: {message}\nAssistant: "
+        # Генерируем ответ
         response = model(
+            context,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            stop=["User:", "\n\n", "<|endoftext|>"]
         )
+        return response.strip()
     except Exception as e:
         return f"Произошла ошибка: {str(e)}"
 # Создаем интерфейс
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(
+            value="Ты дружелюбный и полезный ассистент. Ты всегда отвечаешь кратко и по делу.",
+            label="System message"
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=2048,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)"
+        ),
+    ],
+    title="GGUF Chat Model",
+    description="Чат с GGUF моделью (Llight.Q8_0.gguf)",
+    examples=[
+        ["Привет! Как дела?"],
+        ["Расскажи мне о себе"],
+        ["Что ты умеешь делать?"]
     ],
+    cache_examples=False
 )
 # Запускаем приложение