File size: 3,007 Bytes
c84874b
624d1eb
 
 
9c9ed59
8e89b30
af856f3
620e698
dceaa32
07e046b
4928cb7
727bcb3
9c9ed59
 
 
 
 
 
 
f8f2483
af856f3
624d1eb
a01ac6f
624d1eb
 
9c9ed59
af856f3
 
9c9ed59
af856f3
9c9ed59
5a8b9a7
 
ef3814d
9c9ed59
77eda6d
624d1eb
5a8b9a7
 
77eda6d
624d1eb
77eda6d
af856f3
9c9ed59
 
 
af856f3
9c9ed59
 
e4af97b
9c9ed59
b76abf0
9c9ed59
 
 
 
 
 
1afe06d
 
 
 
 
 
 
9c9ed59
e95e8e1
 
 
2891dae
1afe06d
 
 
197e40a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#from huggingface_hub import InferenceClient
# Use a pipeline as a high-level helper
from transformers import pipeline

import gradio as gr
import random
#client = InferenceClient("LargeWorldModel/LWM-Text-Chat-1M")
#client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
#client = InferenceClient("Trelis/Mistral-7B-Instruct-v0.1-Summarize-16k")
#client = InferenceClient("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
     
from prompts import GAME_MASTER
def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

# Load model directly
#from transformers import AutoTokenizer, AutoModelForCausalLM

#tokenizer = AutoTokenizer.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M")
#model = AutoModelForCausalLM.from_pretrained("LargeWorldModel/LWM-Text-Chat-1M")

#model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
#tokenizer = AutoTokenizer.from_pretrained(model_id)

#model = AutoModelForCausalLM.from_pretrained(model_id)

#pipe = pipeline("text-generation", model="LargeWorldModel/LWM-Text-Chat-1M")

model = gr.load("models/LargeWorldModel/LWM-Text-Chat-1M")

def generate(inp,history,tokens):
    #inputs = tokenizer(inp, return_tensors="pt")
    print (model)
    outputs=model(inp)
    print(outputs)
    #outputs = model.generate(**inputs, max_new_tokens=tokens)
    return outputs    



additional_inputs=[

    gr.Slider(
        label="Max new tokens",
        value=1048,
        minimum=0,
        maximum=1000000,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 46.7B",
    examples=examples,
    concurrency_limit=20,
).launch(share=True,show_api=True)