File size: 3,007 Bytes
c84874b 624d1eb 9c9ed59 8e89b30 af856f3 620e698 dceaa32 07e046b 4928cb7 727bcb3 9c9ed59 f8f2483 af856f3 624d1eb a01ac6f 624d1eb 9c9ed59 af856f3 9c9ed59 af856f3 9c9ed59 5a8b9a7 ef3814d 9c9ed59 77eda6d 624d1eb 5a8b9a7 77eda6d 624d1eb 77eda6d af856f3 9c9ed59 af856f3 9c9ed59 e4af97b 9c9ed59 b76abf0 9c9ed59 1afe06d 9c9ed59 e95e8e1 2891dae 1afe06d 197e40a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | #from huggingface_hub import InferenceClient
# Use a pipeline as a high-level helper
from transformers import pipeline
import gradio as gr
import random
#client = InferenceClient("LargeWorldModel/LWM-Text-Chat-1M")
#client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
#client = InferenceClient("Trelis/Mistral-7B-Instruct-v0.1-Summarize-16k")
#client = InferenceClient("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
from prompts import GAME_MASTER
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
# Load model directly
#from transformers import AutoTokenizer, AutoModelForCausalLM
#tokenizer = AutoTokenizer.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M")
#model = AutoModelForCausalLM.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M")
#model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
#tokenizer = AutoTokenizer.from_pretrained(model_id)
#model = AutoModelForCausalLM.from_pretrained(model_id)
#pipe = pipeline("text-generation", model="LargeWorldModel/LWM-Text-Chat-1M")
model = gr.load("models/LargeWorldModel/LWM-Text-Chat-1M")
def generate(inp,history,tokens):
#inputs = tokenizer(inp, return_tensors="pt")
print (model)
outputs=model(inp)
print(outputs)
#outputs = model.generate(**inputs, max_new_tokens=tokens)
return outputs
additional_inputs=[
gr.Slider(
label="Max new tokens",
value=1048,
minimum=0,
maximum=1000000,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
]
examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
]
gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
title="Mixtral 46.7B",
examples=examples,
concurrency_limit=20,
).launch(share=True,show_api=True) |