Goated121 commited on
Commit
d697806
·
verified ·
1 Parent(s): 337fb0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -1,17 +1,23 @@
1
  from llama_cpp import Llama
2
  import gradio as gr
3
 
4
- # Load your quantized GGUF model
5
- llm = Llama(model_path="/app/models/qwen2.5-1.5B-q4.gguf")
6
-
7
- def generate_text(prompt):
8
- output = llm(prompt, max_tokens=200)
9
- return output['choices'][0]['text']
10
-
11
- demo = gr.Interface(
12
- fn=generate_text,
13
- inputs=gr.Textbox(lines=2, placeholder="Type your prompt here..."),
14
- outputs="text"
15
  )
16
 
17
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from llama_cpp import Llama
2
  import gradio as gr
3
 
4
+ model = Llama(
5
+ model_path="qwen2.5-1.5B-q4.gguf",
6
+ n_threads=4,
7
+ n_ctx=2048,
 
 
 
 
 
 
 
8
  )
9
 
10
+ def chat(prompt):
11
+ out = model(
12
+ prompt,
13
+ max_tokens=256,
14
+ temperature=0.7,
15
+ )
16
+ return out["choices"][0]["text"]
17
+
18
+ gr.Interface(
19
+ fn=chat,
20
+ inputs="text",
21
+ outputs="text",
22
+ title="Qwen2.5-1.5B Q4 Chatbot"
23
+ ).launch()