Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import json | |
| from globe import title, description, customtool , presentation1, presentation2, joinus | |
| import spaces | |
| model_path = "nvidia/Nemotron-Mini-4B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| model = AutoModelForCausalLM.from_pretrained(model_path) | |
| # Create a pipeline | |
| pipe = pipeline("text-generation", model=model_path) | |
| pipe.tokenizer = tokenizer # Assign tokenizer manually | |
| def create_prompt(system_message, user_message, tool_definition="", context=""): | |
| if tool_definition: | |
| return f"""<extra_id_0>System | |
| {system_message} | |
| <tool> | |
| {tool_definition} | |
| </tool> | |
| <context> | |
| {context} | |
| </context> | |
| <extra_id_1>User | |
| {user_message} | |
| <extra_id_1>Assistant | |
| """ | |
| else: | |
| return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n" | |
| def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""): | |
| full_prompt = create_prompt(system_message, message, tool_definition, context) | |
| if use_pipeline: | |
| messages = [ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": message}, | |
| ] | |
| response = pipe(messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)[0]['generated_text'] | |
| else: | |
| tokenized_chat = tokenizer.apply_chat_template( | |
| [ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": message}, | |
| ], | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| tokenized_chat, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True | |
| ) | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| assistant_response = response.split("<extra_id_1>Assistant\n")[-1].strip() | |
| if tool_definition and "<toolcall>" in assistant_response: | |
| tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0] | |
| assistant_response += f"\n\nTool Call: {tool_call}\n\nNote: This is a simulated tool call. In a real scenario, the tool would be executed and its output would be used to generate a final response." | |
| return assistant_response | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.Markdown(title) | |
| with gr.Row(): | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| with gr.Group(): | |
| gr.Markdown(presentation1) | |
| with gr.Group(): | |
| gr.Markdown(presentation2) | |
| with gr.Row(): | |
| gr.Markdown(joinus) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot(label="π€Nemotron-Mini", height=400) | |
| msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...") | |
| with gr.Accordion(label="π§ͺAdvanced Settings", open=False): | |
| system_message = gr.Textbox( | |
| label="System Message", | |
| value="You are a helpful AI assistant.", | |
| lines=2, | |
| placeholder="Set the AI's behavior and context..." | |
| ) | |
| context = gr.Textbox( | |
| label="Context", | |
| lines=2, | |
| placeholder="Enter additional context information..." | |
| ) | |
| max_tokens = gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens") | |
| temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p") | |
| use_pipeline = gr.Checkbox(label="Useπ€·π»ββοΈPipeline", value=False) | |
| use_tool = gr.Checkbox(label="Use Functionπ€Calling", value=False) | |
| with gr.Column(visible=False) as tool_options: | |
| tool_definition = gr.Code( | |
| label="π€Tool Definition (JSON)", | |
| value=customtool, | |
| lines=15, | |
| language="json" | |
| ) | |
| with gr.Row(): | |
| clear = gr.Button("Clear") | |
| send = gr.Button("Send") | |
| def user(user_message, history): | |
| return "", history + [[user_message, None]] | |
| def bot(history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context): | |
| user_message = history[-1][0] | |
| bot_message = generate_response(user_message, history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context) | |
| history[-1][1] = bot_message | |
| return history | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot | |
| ) | |
| send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| use_tool.change( | |
| fn=lambda x: gr.update(visible=x), | |
| inputs=[use_tool], | |
| outputs=[tool_options] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |