OPtimusPrimeSkibidi commited on
Commit
8d2c89a
·
verified ·
1 Parent(s): c946995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -1,8 +1,15 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- # This model works on free HF Inference API and is uncensored
5
- client = InferenceClient(model="NousResearch/Hermes-3-Llama-3.1-8B")
 
 
 
 
 
 
 
6
 
7
  def chat(message, history):
8
  messages = []
@@ -11,23 +18,18 @@ def chat(message, history):
11
  messages.append({"role": "assistant", "content": assistant})
12
  messages.append({"role": "user", "content": message})
13
 
14
- response = ""
15
  try:
16
- for chunk in client.chat_completion(
17
  messages=messages,
18
  max_tokens=512,
19
  temperature=0.7,
20
- stream=True
21
- ):
22
- if chunk.choices[0].delta.content:
23
- response += chunk.choices[0].delta.content
24
- yield response
25
  except Exception as e:
26
- yield f"Error: {str(e)}"
27
 
28
  demo = gr.ChatInterface(
29
  chat,
30
- type="messages",
31
  title="AI Chat",
32
  description="Context maintained during session, resets on refresh"
33
  )
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
 
4
+ # Load model locally - this will take a few minutes on first startup
5
+ llm = Llama.from_pretrained(
6
+ repo_id="bartowski/Llama-3-8B-Lexi-Uncensored-GGUF",
7
+ filename="*Q4_K_M.gguf", # 4-bit quantization for CPU
8
+ n_ctx=4096,
9
+ n_threads=4,
10
+ n_gpu_layers=0,
11
+ verbose=False
12
+ )
13
 
14
  def chat(message, history):
15
  messages = []
 
18
  messages.append({"role": "assistant", "content": assistant})
19
  messages.append({"role": "user", "content": message})
20
 
 
21
  try:
22
+ response = llm.create_chat_completion(
23
  messages=messages,
24
  max_tokens=512,
25
  temperature=0.7,
26
+ )
27
+ return response["choices"][0]["message"]["content"]
 
 
 
28
  except Exception as e:
29
+ return f"Error: {str(e)}"
30
 
31
  demo = gr.ChatInterface(
32
  chat,
 
33
  title="AI Chat",
34
  description="Context maintained during session, resets on refresh"
35
  )