Fu01978 commited on
Commit
d19644f
·
verified ·
1 Parent(s): 8dcdd3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -22
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
3
  import os
4
 
5
  # Download and load the GGUF model
@@ -14,13 +14,28 @@ if not os.path.exists(model_path):
14
  print("Model downloaded!")
15
 
16
  # Load the model
17
- llm = Llama(
18
- model_path=model_path,
19
- n_ctx=2048, # Context window
20
- n_threads=4, # Number of CPU threads
21
- n_gpu_layers=0 # Set to -1 to offload all layers to GPU if available
22
  )
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def chat(message, history):
25
  """
26
  Process chat messages and generate responses.
@@ -29,28 +44,19 @@ def chat(message, history):
29
  message: Current user message
30
  history: List of [user_msg, bot_msg] pairs
31
  """
32
- # Build conversation with proper Llama format
33
- messages = []
34
-
35
- # Add chat history
36
- for user_msg, bot_msg in history:
37
- messages.append({"role": "user", "content": user_msg})
38
- messages.append({"role": "assistant", "content": bot_msg})
39
-
40
- # Add current message
41
- messages.append({"role": "user", "content": message})
42
 
43
  # Generate response
44
- response = llm.create_chat_completion(
45
- messages=messages,
46
- max_tokens=512,
47
  temperature=0.7,
48
  top_p=0.9,
49
- stream=False
50
  )
51
 
52
- # Extract the assistant's response
53
- return response["choices"][0]["message"]["content"]
54
 
55
  # Create Gradio interface
56
  demo = gr.ChatInterface(
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
3
  import os
4
 
5
  # Download and load the GGUF model
 
14
  print("Model downloaded!")
15
 
16
  # Load the model
17
+ llm = AutoModelForCausalLM.from_pretrained(
18
+ model_path,
19
+ model_type="llama",
20
+ context_length=2048,
21
+ gpu_layers=0 # Set higher if GPU available
22
  )
23
 
24
+ def format_prompt(message, history):
25
+ """Format the conversation into Llama 3.2 chat format"""
26
+ prompt = ""
27
+
28
+ # Add chat history
29
+ for user_msg, bot_msg in history:
30
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
31
+ prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
32
+
33
+ # Add current message
34
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
35
+ prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
36
+
37
+ return prompt
38
+
39
  def chat(message, history):
40
  """
41
  Process chat messages and generate responses.
 
44
  message: Current user message
45
  history: List of [user_msg, bot_msg] pairs
46
  """
47
+ # Format the prompt
48
+ prompt = format_prompt(message, history)
 
 
 
 
 
 
 
 
49
 
50
  # Generate response
51
+ response = llm(
52
+ prompt,
53
+ max_new_tokens=512,
54
  temperature=0.7,
55
  top_p=0.9,
56
+ stop=["<|eot_id|>", "<|start_header_id|>"]
57
  )
58
 
59
+ return response.strip()
 
60
 
61
  # Create Gradio interface
62
  demo = gr.ChatInterface(