TheBloke-starchat-beta-GPTQ

Runtime error

App Files Files Community

binqiangliu commited on Oct 24, 2023

Commit

b8ba1e0

1 Parent(s): 0f80c72

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -22

app.py CHANGED Viewed

@@ -1,34 +1,47 @@
 from transformers import pipeline, Conversation
 import gradio as gr
-#这个Space主要是演示了可以直接使用Huggingface的pipeline构建AIApp，然后还刚好可以和Gradio的ChatInterface对应上！
-chatbot = pipeline(model="facebook/blenderbot-400M-distill") #Working!
-#https://huggingface.co/facebook/blenderbot-400M-distill/tree/main
-#这个模型文件大小：730MB或1.46GB
-#https://huggingface.co/facebook/blenderbot-400M-distill/tree/main?library=true
-# Use a pipeline as a high-level helper
-#from transformers import pipeline
-#pipe = pipeline("conversational", model="facebook/blenderbot-400M-distill")
-#chatbot = pipeline(model="HuggingFaceH4/starchat-beta")
-#https://huggingface.co/HuggingFaceH4/starchat-beta/tree/main
-#由于这个模型太大了（9.96+9.86+9.86+1.36GB），会导致如下错误：
-#Runtime error
-#Memory limit exceeded (16Gi)
-#chatbot = pipeline(model="...")
-message_list = []
-response_list = []
-def vanilla_chatbot(message, history):
-    conversation = Conversation(text=message, past_user_inputs=message_list, generated_responses=response_list)
-    conversation = chatbot(conversation)
-    return conversation.generated_responses[-1]
-demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
-demo_chatbot.launch()

 from transformers import pipeline, Conversation
 import gradio as gr
+#https://huggingface.co/TheBloke/starchat-beta-GPTQ
+from transformers import AutoTokenizer, pipeline, logging
+from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+import argparse
+model_name_or_path = "TheBloke/starchat-beta-GPTQ"
+# Or to load it locally, pass the local download path
+# model_name_or_path = "/path/to/models/The_Bloke_starchat-beta-GPTQ"
+use_triton = False
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
+        use_safetensors=True,
+        #device="cuda:0",
+        use_triton=use_triton,
+        quantize_config=None)
+# Prevent printing spurious transformers error when using pipeline with AutoGPTQ
+logging.set_verbosity(logging.CRITICAL)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>"
+prompt = prompt_template.format(query="How do I sort a list in Python?")
+# We use a special <|end|> token with ID 49155 to denote ends of a turn
+outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155)
+# You can sort a list in Python by using the sort() method. Here's an example:\n\n```\nnumbers = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]\nnumbers.sort()\nprint(numbers)\n```\n\nThis will sort the list in place and print the sorted list.
+print(outputs[0]['generated_text'])
+#message_list = []
+#response_list = []
+#def vanilla_chatbot(message, history):
+#    conversation = Conversation(text=message, past_user_inputs=message_list, generated_responses=response_list)
+#    conversation = chatbot(conversation)
+#    return conversation.generated_responses[-1]
+#demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
+#demo_chatbot.launch()