Spaces:

Valtry
/

Auric-Bot

Running

Valtry commited on Feb 28

Commit

e9df17f

verified ·

1 Parent(s): 9e58690

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Small model for CPU
-model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -11,8 +11,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float32,
-    device_map="cpu"
 )
 print("Model loaded successfully!")
@@ -20,7 +19,7 @@ print("Model loaded successfully!")
 def chat(message):
     prompt = f"""
-You are a helpful assistant.
 User: {message}
 Assistant:
@@ -30,12 +29,17 @@ Assistant:
     output = model.generate(
         **inputs,
-        max_new_tokens=100,
-        temperature=0.7
     )
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     return response
@@ -43,7 +47,8 @@ demo = gr.Interface(
     fn=chat,
     inputs=gr.Textbox(label="Ask something"),
     outputs=gr.Textbox(label="AI Response"),
-    title="Auric AI Model Test"
 )
 demo.launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Faster small model for CPU
+model_name = "Qwen/Qwen2-0.5B-Instruct"
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float32
 )
 print("Model loaded successfully!")
 def chat(message):
     prompt = f"""
+You are a helpful AI assistant.
 User: {message}
 Assistant:
     output = model.generate(
         **inputs,
+        max_new_tokens=80,   # smaller = faster
+        temperature=0.7,
+        do_sample=True
     )
     response = tokenizer.decode(output[0], skip_special_tokens=True)
+    # clean response (remove prompt part)
+    if "Assistant:" in response:
+        response = response.split("Assistant:")[-1].strip()
     return response
     fn=chat,
     inputs=gr.Textbox(label="Ask something"),
     outputs=gr.Textbox(label="AI Response"),
+    title="Auric AI Model Test",
+    description="Testing Qwen2-0.5B model on Hugging Face Space"
 )
 demo.launch()