Valtry commited on
Commit
e9df17f
·
verified ·
1 Parent(s): 9e58690

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -2,8 +2,8 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # Small model for CPU
6
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
7
 
8
  print("Loading tokenizer...")
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -11,8 +11,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  print("Loading model...")
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
- torch_dtype=torch.float32,
15
- device_map="cpu"
16
  )
17
 
18
  print("Model loaded successfully!")
@@ -20,7 +19,7 @@ print("Model loaded successfully!")
20
  def chat(message):
21
 
22
  prompt = f"""
23
- You are a helpful assistant.
24
 
25
  User: {message}
26
  Assistant:
@@ -30,12 +29,17 @@ Assistant:
30
 
31
  output = model.generate(
32
  **inputs,
33
- max_new_tokens=100,
34
- temperature=0.7
 
35
  )
36
 
37
  response = tokenizer.decode(output[0], skip_special_tokens=True)
38
 
 
 
 
 
39
  return response
40
 
41
 
@@ -43,7 +47,8 @@ demo = gr.Interface(
43
  fn=chat,
44
  inputs=gr.Textbox(label="Ask something"),
45
  outputs=gr.Textbox(label="AI Response"),
46
- title="Auric AI Model Test"
 
47
  )
48
 
49
  demo.launch()
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
+ # Faster small model for CPU
6
+ model_name = "Qwen/Qwen2-0.5B-Instruct"
7
 
8
  print("Loading tokenizer...")
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
11
  print("Loading model...")
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
+ torch_dtype=torch.float32
 
15
  )
16
 
17
  print("Model loaded successfully!")
 
19
  def chat(message):
20
 
21
  prompt = f"""
22
+ You are a helpful AI assistant.
23
 
24
  User: {message}
25
  Assistant:
 
29
 
30
  output = model.generate(
31
  **inputs,
32
+ max_new_tokens=80, # smaller = faster
33
+ temperature=0.7,
34
+ do_sample=True
35
  )
36
 
37
  response = tokenizer.decode(output[0], skip_special_tokens=True)
38
 
39
+ # clean response (remove prompt part)
40
+ if "Assistant:" in response:
41
+ response = response.split("Assistant:")[-1].strip()
42
+
43
  return response
44
 
45
 
 
47
  fn=chat,
48
  inputs=gr.Textbox(label="Ask something"),
49
  outputs=gr.Textbox(label="AI Response"),
50
+ title="Auric AI Model Test",
51
+ description="Testing Qwen2-0.5B model on Hugging Face Space"
52
  )
53
 
54
  demo.launch()