Spaces:

Vijayrathank
/

sample-space

Sleeping

File size: 2,095 Bytes

f2ad658
caf0818
f2ad658
6b97fe3
 
bc16332
 
ecb20f9
 
f2ad658
59ec3ea
ecb20f9
 
f2ad658
 
6b97fe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2ad658
6b97fe3
f2ad658
6b97fe3
f2ad658
6b97fe3
f2ad658
6b97fe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2ad658
 
 
 
 
6b97fe3
f2ad658
 
 
 
 
 
 
 
 
6b97fe3

import gradio as gr
from huggingface_hub import login

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

model_id = "meta-llama/Llama-3.2-1B"  # small enough to run locally on CPU
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)


def chat(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)
# def respond(
#     message,
#     history: list[dict[str, str]],
#     system_message,
#     max_tokens,
#     temperature,
#     top_p,
#     hf_token: gr.OAuthToken,
# ):
#     """
#     For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
#     """
#     client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")

#     messages = [{"role": "system", "content": system_message}]

#     messages.extend(history)

#     messages.append({"role": "user", "content": message})

#     response = ""

#     for message in client.chat_completion(
#         messages,
#         max_tokens=max_tokens,
#         stream=True,
#         temperature=temperature,
#         top_p=top_p,
#     ):
#         choices = message.choices
#         token = ""
#         if len(choices) and choices[0].delta.content:
#             token = choices[0].delta.content

#         response += token
#         yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
chatbot = gr.Interface(fn=chat, inputs="text", outputs="text", title="Local HF Model Chatbot")

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()


if __name__ == "__main__":
    demo.launch()