Fu01978 commited on
Commit
f3dd530
·
verified ·
1 Parent(s): 6aa1120

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -28
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from ctransformers import AutoModelForCausalLM
3
  import os
4
 
5
  # Download and load the GGUF model
@@ -14,27 +14,15 @@ if not os.path.exists(model_path):
14
  print("Model downloaded!")
15
 
16
  # Load the model
17
- llm = AutoModelForCausalLM.from_pretrained(
18
- model_path,
19
- model_type="llama",
20
- context_length=2048,
21
- gpu_layers=0 # Set higher if GPU available
 
 
22
  )
23
-
24
- def format_prompt(message, history):
25
- """Format the conversation into Llama 3.2 chat format"""
26
- prompt = ""
27
-
28
- # Add chat history
29
- for user_msg, bot_msg in history:
30
- prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
31
- prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
32
-
33
- # Add current message
34
- prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
35
- prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
36
-
37
- return prompt
38
 
39
  def chat(message, history):
40
  """
@@ -44,19 +32,27 @@ def chat(message, history):
44
  message: Current user message
45
  history: List of [user_msg, bot_msg] pairs
46
  """
47
- # Format the prompt
48
- prompt = format_prompt(message, history)
 
 
 
 
 
 
 
 
49
 
50
  # Generate response
51
- response = llm(
52
- prompt,
53
- max_new_tokens=512,
54
  temperature=0.7,
55
  top_p=0.9,
56
- stop=["<|eot_id|>", "<|start_header_id|>"]
57
  )
58
 
59
- return response.strip()
 
60
 
61
  # Create Gradio interface
62
  demo = gr.ChatInterface(
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
  import os
4
 
5
  # Download and load the GGUF model
 
14
  print("Model downloaded!")
15
 
16
  # Load the model
17
+ print("Loading model...")
18
+ llm = Llama(
19
+ model_path=model_path,
20
+ n_ctx=2048,
21
+ n_threads=4,
22
+ n_gpu_layers=0,
23
+ verbose=False
24
  )
25
+ print("Model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def chat(message, history):
28
  """
 
32
  message: Current user message
33
  history: List of [user_msg, bot_msg] pairs
34
  """
35
+ # Build conversation with proper Llama format
36
+ messages = []
37
+
38
+ # Add chat history
39
+ for user_msg, bot_msg in history:
40
+ messages.append({"role": "user", "content": user_msg})
41
+ messages.append({"role": "assistant", "content": bot_msg})
42
+
43
+ # Add current message
44
+ messages.append({"role": "user", "content": message})
45
 
46
  # Generate response
47
+ response = llm.create_chat_completion(
48
+ messages=messages,
49
+ max_tokens=512,
50
  temperature=0.7,
51
  top_p=0.9,
 
52
  )
53
 
54
+ # Extract the assistant's response
55
+ return response["choices"][0]["message"]["content"]
56
 
57
  # Create Gradio interface
58
  demo = gr.ChatInterface(