import gradio as gr from datetime import datetime import torch from transformers import AutoTokenizer, AutoModelForCausalLM def get_conversation(): conversation = [{ "role": "system", "content": f"""Today is {datetime.now().strftime('%d/%m/%Y')}. You are a chatbot that supports users in answering general questions.\nThere is a function to search for type of question.\nYou are given a user's query and you need to classify the function to get the desired output.\nThis is the question that the user have asked:""" }] return conversation def generate_response(prompt): model = AutoModelForCausalLM.from_pretrained( "dohuyen/general-function-call", torch_dtype=torch.bfloat16, # load_in_8bit=True, device_map="auto", offload_folder="offload_folder", # attn_implementation="flash_attention_2" ) tokenizer = AutoTokenizer.from_pretrained("dohuyen/general-function-call") inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs.input_ids, max_length=256, temperature=0.7) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Gradio interface interface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Causal Language Model", description="A model that generates text based on your prompt." ) interface.launch()