Spaces:

FarReelAILab
/

Machine_Mindset

Runtime error

App Files Files Community

wangrongsheng commited on Jan 5, 2024

Commit

16a199e

1 Parent(s): fe7fe2a

del two models

Browse files

Files changed (1) hide show

app.py +35 -79

app.py CHANGED Viewed

@@ -13,17 +13,13 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Machine Mindset
 MM (Machine_Mindset) series models are developed through a collaboration between FarReel AI Lab(formerly known as the ChatLaw project) and Peking University's Deep Research Institute. These models are large-scale language models for various MBTI types in both Chinese and English, built on the Baichuan and LLaMA2 platforms.
 """
 LICENSE = """
 ---
 * Our code adheres to the Apache 2.0 open-source license. Please refer to the [LICENSE](https://github.com/PKU-YuanGroup/Machine-Mindset/blob/main/LICENSE) for specific details of the open-source agreement.
 * Our model weights are subject to an open-source agreement based on the original weights, with specific details provided in the Chinese version under the baichuan open-source license. For commercial use, please refer to [model_LICENSE](https://huggingface.co/JessyTsu1/Machine_Mindset_zh_INTP/resolve/main/Machine_Mindset%E5%9F%BA%E4%BA%8Ebaichuan%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) for further information.
 * The English version follows the open-source agreement under the [llama2 license](https://ai.meta.com/resources/models-and-libraries/llama-downloads/).
 """
@@ -36,16 +32,10 @@ if torch.cuda.is_available():
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
-    model_id_zh = "FarReelAILab/Machine_Mindset_zh_INTJ"
-    model_zh = AutoModelForCausalLM.from_pretrained(model_id_zh, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
-    tokenizer_zh = AutoTokenizer.from_pretrained(model_id_zh, trust_remote_code=True)
-    tokenizer_zh.use_default_system_prompt = False
 @spaces.GPU
 def generate(
-    select_model: str,
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
@@ -55,78 +45,43 @@ def generate(
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    if select_model=="INTJ-en":
-        conversation = []
-        if system_prompt:
-            conversation.append({"role": "system", "content": system_prompt})
-        for user, assistant in chat_history:
-            conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-        conversation.append({"role": "user", "content": message})
-        input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
-        if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-            input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-            gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-        input_ids = input_ids.to(model.device)
-        streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-        generate_kwargs = dict(
-            {"input_ids": input_ids},
-            streamer=streamer,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            top_p=top_p,
-            top_k=top_k,
-            temperature=temperature,
-            num_beams=1,
-            repetition_penalty=repetition_penalty,
-        )
-        t = Thread(target=model.generate, kwargs=generate_kwargs)
-        t.start()
-        outputs = []
-        for text in streamer:
-            outputs.append(text)
-            yield "".join(outputs)
-    if select_model=="INTJ-zh":
-        conversation = []
-        if system_prompt:
-            conversation.append({"role": "system", "content": system_prompt})
-        for user, assistant in chat_history:
-            conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-        conversation.append({"role": "user", "content": message})
-        input_ids = tokenizer_zh.apply_chat_template(conversation, return_tensors="pt")
-        if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-            input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-            gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-        input_ids = input_ids.to(model_zh.device)
-        streamer = TextIteratorStreamer(tokenizer_zh, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-        generate_kwargs = dict(
-            {"input_ids": input_ids},
-            streamer=streamer,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            top_p=top_p,
-            top_k=top_k,
-            temperature=temperature,
-            num_beams=1,
-            repetition_penalty=repetition_penalty,
-        )
-        t = Thread(target=model_zh.generate, kwargs=generate_kwargs)
-        t.start()
-        outputs = []
-        for text in streamer:
-            outputs.append(text)
-            yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
-        gr.Dropdown(choices=["INTJ-en", "INTJ-zh"], value="INTJ-en", label="Select Model"),
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
@@ -170,6 +125,7 @@ chat_interface = gr.ChatInterface(
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Explain the plot of Cinderella in a sentence."],
         ["How many hours does it take a man to eat a Helicopter?"],
     ],
 )
@@ -180,4 +136,4 @@ with gr.Blocks(css="style.css") as demo:
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 DESCRIPTION = """\
 # Machine Mindset
 MM (Machine_Mindset) series models are developed through a collaboration between FarReel AI Lab(formerly known as the ChatLaw project) and Peking University's Deep Research Institute. These models are large-scale language models for various MBTI types in both Chinese and English, built on the Baichuan and LLaMA2 platforms.
 """
 LICENSE = """
 ---
 * Our code adheres to the Apache 2.0 open-source license. Please refer to the [LICENSE](https://github.com/PKU-YuanGroup/Machine-Mindset/blob/main/LICENSE) for specific details of the open-source agreement.
 * Our model weights are subject to an open-source agreement based on the original weights, with specific details provided in the Chinese version under the baichuan open-source license. For commercial use, please refer to [model_LICENSE](https://huggingface.co/JessyTsu1/Machine_Mindset_zh_INTP/resolve/main/Machine_Mindset%E5%9F%BA%E4%BA%8Ebaichuan%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) for further information.
 * The English version follows the open-source agreement under the [llama2 license](https://ai.meta.com/resources/models-and-libraries/llama-downloads/).
 """
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    conversation = []
+    if system_prompt:
+        conversation.append({"role": "system", "content": system_prompt})
+    for user, assistant in chat_history:
+        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+    conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Explain the plot of Cinderella in a sentence."],
         ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
 )
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()