zsolnai commited on
Commit
959abf9
·
1 Parent(s): 671e524

Retry gguf and llamacpp

Browse files
Files changed (2) hide show
  1. app.py +19 -21
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  import numpy as np
6
  import soundfile as sf
7
  import torch
8
- from huggingface_hub import InferenceClient
9
  from transformers import pipeline
10
  from TTS.api import TTS
11
 
@@ -28,15 +28,20 @@ print("Loading Whisper...")
28
  STT_MODEL_NAME = "openai/whisper-tiny.en"
29
  stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
30
 
31
- # --- 2. LLM Setup (Transformers Pipeline) ---
32
- print("Setting up LLM...")
33
  HF_API_TOKEN = os.getenv("HF_TOKEN")
34
- if HF_API_TOKEN:
35
- from huggingface_hub import login
36
- login(token=HF_API_TOKEN)
37
 
38
- print("Loading gzsol/model_1b...")
39
- llm_pipe = pipeline("text-generation", model="gzsol/model_1b", device=device, torch_dtype=torch.float32)
 
 
 
 
 
 
 
 
40
 
41
  # --- 3. TTS Setup (Coqui) ---
42
  print("Loading TTS...")
@@ -68,21 +73,17 @@ def chat_with_bot(message, history):
68
  # Create prompt with context
69
  prompt = context + f"User: {message}\nAssistant:"
70
 
71
- print(f"Generating response...")
72
 
73
- # Generate response using the pipeline
74
- outputs = llm_pipe(
75
  prompt,
76
- max_new_tokens=256,
77
  temperature=0.7,
78
- do_sample=True,
79
  top_p=0.95,
80
- num_return_sequences=1,
81
  )
82
 
83
- response = outputs[0]["generated_text"]
84
- # Extract only the new part (remove the prompt)
85
- response_str = response[len(prompt):].strip()
86
 
87
  if not response_str:
88
  response_str = "I received an empty response. Please try again."
@@ -100,10 +101,7 @@ def chat_with_bot(message, history):
100
  print(f"LLM Error: {e}")
101
  print(f"Full traceback:\n{error_trace}")
102
 
103
- if "StopIteration" in error_trace or "not found" in str(e).lower():
104
- error_msg = f"Model not found or not accessible. Please check if HF_TOKEN is valid."
105
- else:
106
- error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"
107
 
108
  history.append({"role": "user", "content": message})
109
  history.append({"role": "assistant", "content": error_msg})
 
5
  import numpy as np
6
  import soundfile as sf
7
  import torch
8
+ from huggingface_hub import hf_hub_download
9
  from transformers import pipeline
10
  from TTS.api import TTS
11
 
 
28
  STT_MODEL_NAME = "openai/whisper-tiny.en"
29
  stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
30
 
31
+ # --- 2. LLM Setup (Llama.cpp) ---
32
+ print("Setting up Llama.cpp...")
33
  HF_API_TOKEN = os.getenv("HF_TOKEN")
 
 
 
34
 
35
+ print("Downloading gzsol/model_1b GGUF...")
36
+ model_path = hf_hub_download(
37
+ repo_id="gzsol/model_1b",
38
+ filename="model.gguf",
39
+ token=HF_API_TOKEN,
40
+ )
41
+
42
+ print(f"Loading model from {model_path}...")
43
+ from llama_cpp import Llama
44
+ llm = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=2048)
45
 
46
  # --- 3. TTS Setup (Coqui) ---
47
  print("Loading TTS...")
 
73
  # Create prompt with context
74
  prompt = context + f"User: {message}\nAssistant:"
75
 
76
+ print(f"Generating response with Llama...")
77
 
78
+ # Generate response using llama.cpp
79
+ response = llm(
80
  prompt,
81
+ max_tokens=256,
82
  temperature=0.7,
 
83
  top_p=0.95,
 
84
  )
85
 
86
+ response_str = response["choices"][0]["text"].strip()
 
 
87
 
88
  if not response_str:
89
  response_str = "I received an empty response. Please try again."
 
101
  print(f"LLM Error: {e}")
102
  print(f"Full traceback:\n{error_trace}")
103
 
104
+ error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"
 
 
 
105
 
106
  history.append({"role": "user", "content": message})
107
  history.append({"role": "assistant", "content": error_msg})
requirements.txt CHANGED
@@ -6,3 +6,4 @@ soundfile
6
  numpy
7
  huggingface-hub
8
  python-dotenv
 
 
6
  numpy
7
  huggingface-hub
8
  python-dotenv
9
+ llama-cpp-python