Spaces:

bricksandbot
/

assessor-platform-chat

Sleeping

App Files Files Community

bricksandbot commited on Nov 16, 2025

Commit

8c04ab8

verified ·

1 Parent(s): ece3728

Deploy Buildsnpper chatbot Gradio interface

Browse files

- Add Gradio chat interface for Buildsnpper platform
- Uses bricksandbotltd/buildsnpper-chatbot-Q4_K_M model
- Includes 8 example questions
- Supports conversation history
- Clean, simple UI for customer support

Files changed (2) hide show

app.py +57 -25
requirements.txt +6 -3

app.py CHANGED Viewed

@@ -1,31 +1,41 @@
 #!/usr/bin/env python3
 """
 Gradio interface for Buildsnpper Chatbot.
-Deployed as a HuggingFace Space.
 """
 import gradio as gr
-from llama_cpp import Llama
-import os
 # Configuration
-MODEL_REPO = "bricksandbotltd/buildsnpper-chatbot-Q4_K_M"
-MODEL_FILE = "buildsnpper-chatbot-Q4_K_M.gguf"
-# Initialize model (loaded once at startup)
-print("Loading model...")
-llm = Llama.from_pretrained(
-    repo_id=MODEL_REPO,
-    filename=MODEL_FILE,
-    n_ctx=2048,  # Context window
-    n_threads=4,  # CPU threads
-    verbose=False
 )
 print("Model loaded successfully!")
 def chat(message, history):
     """
-    Process user message and generate response.
     Args:
         message: User's input message
@@ -43,16 +53,35 @@ def chat(message, history):
     # Add current message
     messages.append({"role": "user", "content": message})
-    # Generate response
-    response = llm.create_chat_completion(
-        messages=messages,
-        temperature=0.1,
-        top_p=0.9,
-        max_tokens=300,
-        stop=["<|endoftext|>", "<|end|>"]
     )
-    return response['choices'][0]['message']['content']
 # Example questions
@@ -81,6 +110,8 @@ with gr.Blocks(title="Buildsnpper Chatbot", theme=gr.themes.Soft()) as demo:
         - Technical issues
         **Note**: This chatbot is specialized for Buildsnpper platform questions only.
         """
     )
@@ -97,10 +128,11 @@ with gr.Blocks(title="Buildsnpper Chatbot", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
         ---
-        **Model**: [bricksandbotltd/buildsnpper-chatbot-Q4_K_M](https://huggingface.co/bricksandbotltd/buildsnpper-chatbot-Q4_K_M)
         **Base Model**: microsoft/Phi-4-mini-instruct (3.8B parameters)
         **Fine-tuned**: LoRA on 89 Buildsnpper Q&A pairs
-        **Format**: GGUF Q4_K_M quantized
         """
     )

 #!/usr/bin/env python3
 """
 Gradio interface for Buildsnpper Chatbot.
+Deployed as a HuggingFace Space with ZeroGPU and 4-bit quantization.
 """
 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+import spaces
 # Configuration
+MODEL_REPO = "bricksandbotltd/buildsnpper-chatbot-merged"
+# 4-bit quantization config
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
 )
+# Initialize model and tokenizer
+print("Loading model and tokenizer with 4-bit quantization...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_REPO,
+    quantization_config=quantization_config,
+    device_map="auto",
+    trust_remote_code=True
+)
+model.eval()
 print("Model loaded successfully!")
+@spaces.GPU
 def chat(message, history):
     """
+    Process user message and generate response using ZeroGPU.
     Args:
         message: User's input message
     # Add current message
     messages.append({"role": "user", "content": message})
+    # Format with chat template
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
     )
+    # Tokenize
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=300,
+            temperature=0.1,
+            do_sample=True,
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    # Decode response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract just the assistant's response
+    if "<|assistant|>" in response:
+        response = response.split("<|assistant|>")[-1].strip()
+    return response
 # Example questions
         - Technical issues
         **Note**: This chatbot is specialized for Buildsnpper platform questions only.
+        **Powered by**: ZeroGPU for fast inference
         """
     )
     gr.Markdown(
         """
         ---
+        **Model**: [bricksandbotltd/buildsnpper-chatbot-merged](https://huggingface.co/bricksandbotltd/buildsnpper-chatbot-merged)
         **Base Model**: microsoft/Phi-4-mini-instruct (3.8B parameters)
         **Fine-tuned**: LoRA on 89 Buildsnpper Q&A pairs
+        **Quantization**: 4-bit (NF4) with bitsandbytes
+        **Acceleration**: ZeroGPU
         """
     )

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-gradio==4.44.0
-huggingface-hub==0.20.0
-https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90-cpu/llama_cpp_python-0.2.90-cp310-cp310-manylinux_2_17_x86_64.whl

+gradio
+transformers
+torch
+accelerate
+bitsandbytes
+spaces