pidgin-spaces / app.py
Guavacoderepo's picture
Update app.py
401adc8 verified
import os
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
model_name = "Guavacoderepo/gclm-3b-pidgin"
# Get token from Hugging Face Secrets
hf_token = os.environ.get("HF_TOKEN")
# FP4 quantization config
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
llm_int8_enable_fp32_cpu_offload=True # allows CPU offload for large model
)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token,)
# Load model with BitsAndBytesConfig ONLY
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quant_config,
device_map="auto",
token=hf_token
)
# Create pipeline once (outside function!)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
def chat_fn(user_input):
# Generate response
output = pipe(user_input, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9)
return output[0]["generated_text"]
iface = gr.Interface(fn=chat_fn, inputs="text", outputs="text", title="GCLM-3B-Pidgin Chat")
iface.launch()