Spaces:

jvnickerson
/

chat

Runtime error

App Files Files Community

jvnickerson commited on Sep 16, 2024

Commit

6417feb

verified ·

1 Parent(s): 23a63ce

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.DS_Store +0 -0
README.md +3 -9
inf_chat.py +56 -0
old/chat.py +24 -0
old/cstream.py +23 -0
old/ct.py +97 -0
old/from openai import OpenAI.py +25 -0
old/inf.py +52 -0
old/inf2.py +52 -0
old/temp.py +3 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Chat
-emoji: 👁
-colorFrom: purple
-colorTo: indigo
 sdk: gradio
-sdk_version: 4.44.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: chat
+app_file: inf_chat.py
 sdk: gradio
+sdk_version: 4.39.0
 ---

inf_chat.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+client = InferenceClient()
+MODEL_OPTIONS = {
+    "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
+}
+def generate_text(message, history, model_choice):
+    model = MODEL_OPTIONS[model_choice]
+    prompt = f"{history}\nHuman: {message}\nAI:"
+    output = client.text_generation(
+        prompt,
+        model=model,
+        max_new_tokens=1000,
+        temperature=0.8,
+        stream=True
+    )
+    for chunk in output:
+        yield chunk
+with gr.Blocks() as iface:
+    gr.Markdown("# Chat with LLM Models")
+    gr.Markdown("Select a model and start chatting!")
+    model_dropdown = gr.Dropdown(
+        choices=list(MODEL_OPTIONS.keys()),
+        value="Meta-Llama-3-8B-Instruct",
+        label="Select Model"
+    )
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    def user(user_message, history):
+        return "", history + [[user_message, None]]
+    def bot(history, model_choice):
+        user_message = history[-1][0]
+        bot_message = generate_text(user_message, str(history[:-1]), model_choice)
+        history[-1][1] = ""
+        for chunk in bot_message:
+            history[-1][1] += chunk
+            yield history
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, [chatbot, model_dropdown], chatbot
+    )
+    clear.click(lambda: None, None, chatbot, queue=False)
+iface.launch(share=True)

old/chat.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from openai import OpenAI
+import gradio as gr
+import os
+openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+client= openai_client
+def predict(message, history):
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human })
+        history_openai_format.append({"role": "assistant", "content":assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(model='gpt-3.5-turbo',
+    messages= history_openai_format,
+    temperature=1.0,
+    stream=True)
+    partial_message = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+              partial_message = partial_message + chunk.choices[0].delta.content
+              yield partial_message
+gr.ChatInterface(predict).launch()

old/cstream.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline
+from transformers import AutoTokenizer  # Add this import
+tokenizer = AutoTokenizer.from_pretrained("your_model_name")  # Add this line
+streamer = TextStreamer(tokenizer, skip_prompt=True)
+pipe = pipeline(
+    "text-generation",
+    model=model_fintuned,
+    tokenizer=tokenizer,
+    max_length=2048,
+    temperature=0.6,
+    pad_token_id=tokenizer.eos_token_id,
+    top_p=0.95,
+    repetition_penalty=1.2,
+    device=device,
+    streamer=streamer
+)
+pipe(prompts[0])
+inputs = tokenizer(prompts[0], return_tensors="pt").to(device)
+streamer = TextStreamer(tokenizer, skip_prompt=True)
+_ = model_fintuned.generate(**inputs, streamer=streamer, pad_token_id=tokenizer.eos_token_id, max_length=248, temperature=0.8, top_p=0.8,
+                        repetition_penalty=1.25)

old/ct.py ADDED Viewed

	@@ -0,0 +1,97 @@

+#https://raw.githubusercontent.com/rohan-paul/LLM-FineTuning-Large-Language-Models/refs/heads/main/Mixtral_Chatbot_with_Gradio/Mixtral_Chatbot_with_Gradio.py
+from transformers import AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
+from threading import Thread
+import gradio as gr
+import transformers
+import torch
+# Run the entire app with `python run_mixtral.py`
+""" The messages list should be of the following format:
+messages =
+[
+    {"role": "user", "content": "User's first message"},
+    {"role": "assistant", "content": "Assistant's first response"},
+    {"role": "user", "content": "User's second message"},
+    {"role": "assistant", "content": "Assistant's second response"},
+    {"role": "user", "content": "User's third message"}
+]
+"""
+""" The `format_chat_history` function below is designed to format the dialogue history into a prompt that can be fed into the Mixtral model. This will help understand the context of the conversation and generate appropriate responses by the Model.
+The function takes a history of dialogues as input, which is a list of lists where each sublist represents a pair of user and assistant messages.
+"""
+def format_chat_history(history) -> str:
+    messages = []
+    # Add a system message to set the context
+    messages.append({"role": "system", "content": "You are a helpful assistant."})
+    for i, dialog in enumerate(history):
+        if i == 0:
+            # For the first interaction, only add the user message
+            messages.append({"role": "user", "content": dialog[0]})
+        else:
+            # For subsequent interactions, add both user and assistant messages
+            if dialog[0]:  # User message
+                messages.append({"role": "user", "content": dialog[0]})
+            if dialog[1]:  # Assistant message
+                messages.append({"role": "assistant", "content": dialog[1]})
+    return pipeline.tokenizer.apply_chat_template(
+        messages, tokenize=False,
+        add_generation_prompt=True)
+def model_loading_pipeline():
+    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, Timeout=5)
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model_id,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16,
+        load_in_4bit=True,  # or load_in_8bit=True, depending on your preference
+        device_map="auto",  # This will automatically determine the best device setup
+        streamer=streamer
+    )
+    return pipeline, streamer
+def launch_gradio_app(pipeline, streamer):
+    with gr.Blocks() as demo:
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox()
+        clear = gr.Button("Clear")
+        def user(user_message, history):
+            return "", history + [[user_message, None]]
+        def bot(history):
+            prompt = format_chat_history(history)
+            history[-1][1] = ""
+            kwargs = dict(text_inputs=prompt, max_new_tokens=2048, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
+            thread = Thread(target=pipeline, kwargs=kwargs)
+            thread.start()
+            for token in streamer:
+                history[-1][1] += token
+                yield history
+        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
+        clear.click(lambda: None, None, chatbot, queue=False)
+    demo.queue()
+    demo.launch(share=True, debug=True)
+if __name__ == '__main__':
+    pipeline, streamer = model_loading_pipeline()
+    launch_gradio_app(pipeline, streamer)
+# Run the entire app with `python run_mixtral.py`

old/from openai import OpenAI.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from openai import OpenAI
+import gradio as gr
+api_key = "sk-..."  # Replace with your key
+client = OpenAI(api_key=api_key)
+def predict(message, history):
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human })
+        history_openai_format.append({"role": "assistant", "content":assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(model='gpt-3.5-turbo',
+    messages= history_openai_format,
+    temperature=1.0,
+    stream=True)
+    partial_message = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+              partial_message = partial_message + chunk.choices[0].delta.content
+              yield partial_message
+gr.ChatInterface(predict).launch()

old/inf.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+client = InferenceClient()
+MODEL_OPTIONS = {
+    "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
+}
+def generate_text(prompt, model_choice):
+    model = MODEL_OPTIONS[model_choice]
+    output = client.text_generation(
+        prompt,
+        model=model,
+        max_new_tokens=1000,
+        temperature=0.8,
+        stream=True
+    )
+    full_response = "```markdown\n"  # Start with a markdown code block
+    for chunk in output:
+        full_response += chunk
+        yield full_response + "\n```"  # Close the markdown code block
+def clear_text():
+    return "", "", MODEL_OPTIONS["Meta-Llama-3-8B-Instruct"]
+with gr.Blocks() as iface:
+    gr.Markdown("# Text Generation with LLM Models")
+    gr.Markdown("Select a model, enter a prompt, and click 'Submit' to get generated text.")
+    with gr.Row():
+        model_dropdown = gr.Dropdown(
+            choices=list(MODEL_OPTIONS.keys()),
+            value="Meta-Llama-3-8B-Instruct",
+            label="Select Model"
+        )
+    with gr.Row():
+        input_text = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
+        output_markdown = gr.Markdown()
+    with gr.Row():
+        submit_btn = gr.Button("Submit")
+        clear_btn = gr.Button("Clear")
+    submit_btn.click(generate_text, inputs=[input_text, model_dropdown], outputs=output_markdown)
+    clear_btn.click(clear_text, outputs=[input_text, output_markdown, model_dropdown])
+iface.launch()

old/inf2.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+client = InferenceClient()
+MODEL_OPTIONS = {
+    "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1",
+    "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
+}
+def generate_text(prompt, model_choice):
+    model = MODEL_OPTIONS[model_choice]
+    output = client.text_generation(
+        prompt,
+        model=model,
+        max_new_tokens=1000,
+        temperature=0.3,
+        stream=True
+    )
+    full_response = "```markdown\n"  # Start with a markdown code block
+    for chunk in output:
+        full_response += chunk
+        yield full_response + "\n```"  # Close the markdown code block
+def clear_text():
+    return "", "", MODEL_OPTIONS["Meta-Llama-3-8B-Instruct"]
+with gr.Blocks() as iface:
+    gr.Markdown("# Text Generation with LLM Models")
+    gr.Markdown("Select a model, enter a prompt, and click 'Submit' to get generated text.")
+    with gr.Row():
+        model_dropdown = gr.Dropdown(
+            choices=list(MODEL_OPTIONS.keys()),
+            value="Meta-Llama-3-8B-Instruct",
+            label="Select Model"
+        )
+    with gr.Row():
+        input_text = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
+        output_markdown = gr.Markdown()  # Changed to Markdown component
+    with gr.Row():
+        submit_btn = gr.Button("Submit")
+        clear_btn = gr.Button("Clear")
+    submit_btn.click(generate_text, inputs=[input_text, model_dropdown], outputs=output_markdown)
+    clear_btn.click(clear_text, outputs=[input_text, output_markdown, model_dropdown])
+iface.launch()

old/temp.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from text_generation.inference_api import deployed_models
2	+
3	+ print(deployed_models())