Fu01978 commited on
Commit
98b52f2
·
verified ·
1 Parent(s): a677a92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -1,32 +1,33 @@
1
- import gradio as gr
2
- from koboldcpp import KoboldCpp
3
  from huggingface_hub import hf_hub_download
 
 
4
 
5
- # Download GGUF model
6
  REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
  FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
8
 
9
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
10
 
11
- # Load KoboldCpp runner
12
- llm = KoboldCpp(
13
  model_path=model_path,
14
- context_length=2048,
15
- threads=4
 
16
  )
17
 
18
  def chat_fn(message, history):
19
- response = llm.generate(
20
- prompt=message,
21
- max_length=256,
22
- temp=0.7,
23
  top_p=0.95,
 
24
  )
25
- return response
26
 
27
  demo = gr.ChatInterface(
28
  fn=chat_fn,
29
- title="GGUF via KoboldCpp ⚡",
30
  )
31
 
32
  demo.launch()
 
1
+ import os
 
2
  from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ import gradio as gr
5
 
 
6
  REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
  FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
8
 
9
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
10
 
11
+ llm = Llama(
 
12
  model_path=model_path,
13
+ n_ctx=2048,
14
+ n_threads=4,
15
+ n_gpu_layers=0
16
  )
17
 
18
  def chat_fn(message, history):
19
+ out = llm(
20
+ message,
21
+ max_tokens=256,
22
+ temperature=0.7,
23
  top_p=0.95,
24
+ stop=["</s>"]
25
  )
26
+ return out["choices"][0]["text"]
27
 
28
  demo = gr.ChatInterface(
29
  fn=chat_fn,
30
+ title="llama-cpp-python GGUF Space 🚀",
31
  )
32
 
33
  demo.launch()