Spaces:

gzsol
/

lab2

Sleeping

App Files Files Community

zsolnai commited on Dec 4, 2025

Commit

959abf9

1 Parent(s): 671e524

Retry gguf and llamacpp

Browse files

Files changed (2) hide show

app.py +19 -21
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import gradio as gr
 import numpy as np
 import soundfile as sf
 import torch
-from huggingface_hub import InferenceClient
 from transformers import pipeline
 from TTS.api import TTS
@@ -28,15 +28,20 @@ print("Loading Whisper...")
 STT_MODEL_NAME = "openai/whisper-tiny.en"
 stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
-# --- 2. LLM Setup (Transformers Pipeline) ---
-print("Setting up LLM...")
 HF_API_TOKEN = os.getenv("HF_TOKEN")
-if HF_API_TOKEN:
-    from huggingface_hub import login
-    login(token=HF_API_TOKEN)
-print("Loading gzsol/model_1b...")
-llm_pipe = pipeline("text-generation", model="gzsol/model_1b", device=device, torch_dtype=torch.float32)
 # --- 3. TTS Setup (Coqui) ---
 print("Loading TTS...")
@@ -68,21 +73,17 @@ def chat_with_bot(message, history):
         # Create prompt with context
         prompt = context + f"User: {message}\nAssistant:"
-        print(f"Generating response...")
-        # Generate response using the pipeline
-        outputs = llm_pipe(
             prompt,
-            max_new_tokens=256,
             temperature=0.7,
-            do_sample=True,
             top_p=0.95,
-            num_return_sequences=1,
         )
-        response = outputs[0]["generated_text"]
-        # Extract only the new part (remove the prompt)
-        response_str = response[len(prompt):].strip()
         if not response_str:
             response_str = "I received an empty response. Please try again."
@@ -100,10 +101,7 @@ def chat_with_bot(message, history):
         print(f"LLM Error: {e}")
         print(f"Full traceback:\n{error_trace}")
-        if "StopIteration" in error_trace or "not found" in str(e).lower():
-            error_msg = f"Model not found or not accessible. Please check if HF_TOKEN is valid."
-        else:
-            error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": error_msg})

 import numpy as np
 import soundfile as sf
 import torch
+from huggingface_hub import hf_hub_download
 from transformers import pipeline
 from TTS.api import TTS
 STT_MODEL_NAME = "openai/whisper-tiny.en"
 stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
+# --- 2. LLM Setup (Llama.cpp) ---
+print("Setting up Llama.cpp...")
 HF_API_TOKEN = os.getenv("HF_TOKEN")
+print("Downloading gzsol/model_1b GGUF...")
+model_path = hf_hub_download(
+    repo_id="gzsol/model_1b",
+    filename="model.gguf",
+    token=HF_API_TOKEN,
+)
+print(f"Loading model from {model_path}...")
+from llama_cpp import Llama
+llm = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=2048)
 # --- 3. TTS Setup (Coqui) ---
 print("Loading TTS...")
         # Create prompt with context
         prompt = context + f"User: {message}\nAssistant:"
+        print(f"Generating response with Llama...")
+        # Generate response using llama.cpp
+        response = llm(
             prompt,
+            max_tokens=256,
             temperature=0.7,
             top_p=0.95,
         )
+        response_str = response["choices"][0]["text"].strip()
         if not response_str:
             response_str = "I received an empty response. Please try again."
         print(f"LLM Error: {e}")
         print(f"Full traceback:\n{error_trace}")
+        error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": error_msg})

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ soundfile
 numpy
 huggingface-hub
 python-dotenv

 numpy
 huggingface-hub
 python-dotenv
+llama-cpp-python