Spaces:

CaptMetal
/

BudAi-Chat

Sleeping

File size: 1,226 Bytes

26f7ee1
ff3a5c3
e9572f2
ff3a5c3
26f7ee1
 
 
 
 
 
 
 
e9572f2
 
 
26f7ee1
e9572f2
26f7ee1
 
755a360
436ad70
e9572f2
26f7ee1
e9572f2
 
436ad70
1022de7
436ad70
 
e9572f2
 
1022de7
436ad70

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
import torch


# Set quantization config (4-bit for max speed)
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit precision
    bnb_4bit_quant_type="nf4",  # NF4 for better accuracy
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for computation
    device_map="auto"
)
# Load Phi-2 (smaller model with high-quality responses)
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)
# Speed up inference with torch.compile
model = torch.compile(model)  # Compile the model for faster inference

def respond(message, history):
    inputs = tokenizer(message, return_tensors="pt")
    outputs = model.generate(inputs.input_ids, max_new_tokens=50, temperature=0.7, top_p=0.9)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio Chat Interface
gr.ChatInterface(
    respond,
    title="🤖 Phi-2 Chatbot",
    description="Ask me anything! Powered by Phi-2.",
    examples=["What's your favorite book?", "Tell me a fun fact about space!"],
    theme="soft"
).launch()