#from huggingface_hub import InferenceClient # Use a pipeline as a high-level helper from transformers import pipeline import gradio as gr import random #client = InferenceClient("LargeWorldModel/LWM-Text-Chat-1M") #client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") #client = InferenceClient("Trelis/Mistral-7B-Instruct-v0.1-Summarize-16k") #client = InferenceClient("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T") from prompts import GAME_MASTER def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt # Load model directly #from transformers import AutoTokenizer, AutoModelForCausalLM #tokenizer = AutoTokenizer.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M") #model = AutoModelForCausalLM.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M") #model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" #tokenizer = AutoTokenizer.from_pretrained(model_id) #model = AutoModelForCausalLM.from_pretrained(model_id) #pipe = pipeline("text-generation", model="LargeWorldModel/LWM-Text-Chat-1M") model = gr.load("models/LargeWorldModel/LWM-Text-Chat-1M") def generate(inp,history,tokens): #inputs = tokenizer(inp, return_tensors="pt") print (model) outputs=model(inp) print(outputs) #outputs = model.generate(**inputs, max_new_tokens=tokens) return outputs additional_inputs=[ gr.Slider( label="Max new tokens", value=1048, minimum=0, maximum=1000000, step=64, interactive=True, info="The maximum numbers of new tokens", ), ] examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ], ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,], ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,], ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,], ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,], ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,], ] gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), additional_inputs=additional_inputs, title="Mixtral 46.7B", examples=examples, concurrency_limit=20, ).launch(share=True,show_api=True)