jvnickerson commited on
Commit
6417feb
·
verified ·
1 Parent(s): 23a63ce

Upload folder using huggingface_hub

Browse files
Files changed (10) hide show
  1. .DS_Store +0 -0
  2. README.md +3 -9
  3. inf_chat.py +56 -0
  4. old/chat.py +24 -0
  5. old/cstream.py +23 -0
  6. old/ct.py +97 -0
  7. old/from openai import OpenAI.py +25 -0
  8. old/inf.py +52 -0
  9. old/inf2.py +52 -0
  10. old/temp.py +3 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Chat
3
- emoji: 👁
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: chat
3
+ app_file: inf_chat.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.39.0
 
 
6
  ---
 
 
inf_chat.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient()
5
+
6
+ MODEL_OPTIONS = {
7
+ "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
8
+ "Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
9
+ "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
10
+ }
11
+
12
+ def generate_text(message, history, model_choice):
13
+ model = MODEL_OPTIONS[model_choice]
14
+ prompt = f"{history}\nHuman: {message}\nAI:"
15
+ output = client.text_generation(
16
+ prompt,
17
+ model=model,
18
+ max_new_tokens=1000,
19
+ temperature=0.8,
20
+ stream=True
21
+ )
22
+
23
+ for chunk in output:
24
+ yield chunk
25
+
26
+ with gr.Blocks() as iface:
27
+ gr.Markdown("# Chat with LLM Models")
28
+ gr.Markdown("Select a model and start chatting!")
29
+
30
+ model_dropdown = gr.Dropdown(
31
+ choices=list(MODEL_OPTIONS.keys()),
32
+ value="Meta-Llama-3-8B-Instruct",
33
+ label="Select Model"
34
+ )
35
+
36
+ chatbot = gr.Chatbot()
37
+ msg = gr.Textbox()
38
+ clear = gr.Button("Clear")
39
+
40
+ def user(user_message, history):
41
+ return "", history + [[user_message, None]]
42
+
43
+ def bot(history, model_choice):
44
+ user_message = history[-1][0]
45
+ bot_message = generate_text(user_message, str(history[:-1]), model_choice)
46
+ history[-1][1] = ""
47
+ for chunk in bot_message:
48
+ history[-1][1] += chunk
49
+ yield history
50
+
51
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
52
+ bot, [chatbot, model_dropdown], chatbot
53
+ )
54
+ clear.click(lambda: None, None, chatbot, queue=False)
55
+
56
+ iface.launch(share=True)
old/chat.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import gradio as gr
3
+ import os
4
+ openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
5
+ client= openai_client
6
+ def predict(message, history):
7
+ history_openai_format = []
8
+ for human, assistant in history:
9
+ history_openai_format.append({"role": "user", "content": human })
10
+ history_openai_format.append({"role": "assistant", "content":assistant})
11
+ history_openai_format.append({"role": "user", "content": message})
12
+
13
+ response = client.chat.completions.create(model='gpt-3.5-turbo',
14
+ messages= history_openai_format,
15
+ temperature=1.0,
16
+ stream=True)
17
+
18
+ partial_message = ""
19
+ for chunk in response:
20
+ if chunk.choices[0].delta.content is not None:
21
+ partial_message = partial_message + chunk.choices[0].delta.content
22
+ yield partial_message
23
+
24
+ gr.ChatInterface(predict).launch()
old/cstream.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline
2
+ from transformers import AutoTokenizer # Add this import
3
+
4
+ tokenizer = AutoTokenizer.from_pretrained("your_model_name") # Add this line
5
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
6
+ pipe = pipeline(
7
+ "text-generation",
8
+ model=model_fintuned,
9
+ tokenizer=tokenizer,
10
+ max_length=2048,
11
+ temperature=0.6,
12
+ pad_token_id=tokenizer.eos_token_id,
13
+ top_p=0.95,
14
+ repetition_penalty=1.2,
15
+ device=device,
16
+ streamer=streamer
17
+ )
18
+ pipe(prompts[0])
19
+
20
+ inputs = tokenizer(prompts[0], return_tensors="pt").to(device)
21
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
22
+ _ = model_fintuned.generate(**inputs, streamer=streamer, pad_token_id=tokenizer.eos_token_id, max_length=248, temperature=0.8, top_p=0.8,
23
+ repetition_penalty=1.25)
old/ct.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #https://raw.githubusercontent.com/rohan-paul/LLM-FineTuning-Large-Language-Models/refs/heads/main/Mixtral_Chatbot_with_Gradio/Mixtral_Chatbot_with_Gradio.py
2
+ from transformers import AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
3
+
4
+ from threading import Thread
5
+ import gradio as gr
6
+ import transformers
7
+ import torch
8
+
9
+ # Run the entire app with `python run_mixtral.py`
10
+
11
+ """ The messages list should be of the following format:
12
+
13
+ messages =
14
+
15
+ [
16
+ {"role": "user", "content": "User's first message"},
17
+ {"role": "assistant", "content": "Assistant's first response"},
18
+ {"role": "user", "content": "User's second message"},
19
+ {"role": "assistant", "content": "Assistant's second response"},
20
+ {"role": "user", "content": "User's third message"}
21
+ ]
22
+
23
+ """
24
+ """ The `format_chat_history` function below is designed to format the dialogue history into a prompt that can be fed into the Mixtral model. This will help understand the context of the conversation and generate appropriate responses by the Model.
25
+ The function takes a history of dialogues as input, which is a list of lists where each sublist represents a pair of user and assistant messages.
26
+ """
27
+
28
+ def format_chat_history(history) -> str:
29
+ messages = []
30
+
31
+ # Add a system message to set the context
32
+ messages.append({"role": "system", "content": "You are a helpful assistant."})
33
+
34
+ for i, dialog in enumerate(history):
35
+ if i == 0:
36
+ # For the first interaction, only add the user message
37
+ messages.append({"role": "user", "content": dialog[0]})
38
+ else:
39
+ # For subsequent interactions, add both user and assistant messages
40
+ if dialog[0]: # User message
41
+ messages.append({"role": "user", "content": dialog[0]})
42
+ if dialog[1]: # Assistant message
43
+ messages.append({"role": "assistant", "content": dialog[1]})
44
+
45
+ return pipeline.tokenizer.apply_chat_template(
46
+ messages, tokenize=False,
47
+ add_generation_prompt=True)
48
+
49
+ def model_loading_pipeline():
50
+
51
+ model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
52
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
53
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, Timeout=5)
54
+
55
+ pipeline = transformers.pipeline(
56
+ "text-generation",
57
+ model=model_id,
58
+ tokenizer=tokenizer,
59
+ torch_dtype=torch.float16,
60
+ load_in_4bit=True, # or load_in_8bit=True, depending on your preference
61
+ device_map="auto", # This will automatically determine the best device setup
62
+ streamer=streamer
63
+ )
64
+ return pipeline, streamer
65
+
66
+ def launch_gradio_app(pipeline, streamer):
67
+ with gr.Blocks() as demo:
68
+ chatbot = gr.Chatbot()
69
+ msg = gr.Textbox()
70
+ clear = gr.Button("Clear")
71
+
72
+ def user(user_message, history):
73
+ return "", history + [[user_message, None]]
74
+
75
+ def bot(history):
76
+ prompt = format_chat_history(history)
77
+
78
+ history[-1][1] = ""
79
+ kwargs = dict(text_inputs=prompt, max_new_tokens=2048, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
80
+ thread = Thread(target=pipeline, kwargs=kwargs)
81
+ thread.start()
82
+
83
+ for token in streamer:
84
+ history[-1][1] += token
85
+ yield history
86
+
87
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
88
+ clear.click(lambda: None, None, chatbot, queue=False)
89
+
90
+ demo.queue()
91
+ demo.launch(share=True, debug=True)
92
+
93
+ if __name__ == '__main__':
94
+ pipeline, streamer = model_loading_pipeline()
95
+ launch_gradio_app(pipeline, streamer)
96
+
97
+ # Run the entire app with `python run_mixtral.py`
old/from openai import OpenAI.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import gradio as gr
3
+
4
+ api_key = "sk-..." # Replace with your key
5
+ client = OpenAI(api_key=api_key)
6
+
7
+ def predict(message, history):
8
+ history_openai_format = []
9
+ for human, assistant in history:
10
+ history_openai_format.append({"role": "user", "content": human })
11
+ history_openai_format.append({"role": "assistant", "content":assistant})
12
+ history_openai_format.append({"role": "user", "content": message})
13
+
14
+ response = client.chat.completions.create(model='gpt-3.5-turbo',
15
+ messages= history_openai_format,
16
+ temperature=1.0,
17
+ stream=True)
18
+
19
+ partial_message = ""
20
+ for chunk in response:
21
+ if chunk.choices[0].delta.content is not None:
22
+ partial_message = partial_message + chunk.choices[0].delta.content
23
+ yield partial_message
24
+
25
+ gr.ChatInterface(predict).launch()
old/inf.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient()
5
+
6
+ MODEL_OPTIONS = {
7
+ "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
8
+ "Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
9
+ "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
10
+ }
11
+
12
+ def generate_text(prompt, model_choice):
13
+ model = MODEL_OPTIONS[model_choice]
14
+ output = client.text_generation(
15
+ prompt,
16
+ model=model,
17
+ max_new_tokens=1000,
18
+ temperature=0.8,
19
+ stream=True
20
+ )
21
+
22
+ full_response = "```markdown\n" # Start with a markdown code block
23
+ for chunk in output:
24
+ full_response += chunk
25
+ yield full_response + "\n```" # Close the markdown code block
26
+
27
+ def clear_text():
28
+ return "", "", MODEL_OPTIONS["Meta-Llama-3-8B-Instruct"]
29
+
30
+ with gr.Blocks() as iface:
31
+ gr.Markdown("# Text Generation with LLM Models")
32
+ gr.Markdown("Select a model, enter a prompt, and click 'Submit' to get generated text.")
33
+
34
+ with gr.Row():
35
+ model_dropdown = gr.Dropdown(
36
+ choices=list(MODEL_OPTIONS.keys()),
37
+ value="Meta-Llama-3-8B-Instruct",
38
+ label="Select Model"
39
+ )
40
+
41
+ with gr.Row():
42
+ input_text = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
43
+ output_markdown = gr.Markdown()
44
+
45
+ with gr.Row():
46
+ submit_btn = gr.Button("Submit")
47
+ clear_btn = gr.Button("Clear")
48
+
49
+ submit_btn.click(generate_text, inputs=[input_text, model_dropdown], outputs=output_markdown)
50
+ clear_btn.click(clear_text, outputs=[input_text, output_markdown, model_dropdown])
51
+
52
+ iface.launch()
old/inf2.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient()
5
+
6
+ MODEL_OPTIONS = {
7
+ "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
8
+ "Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1",
9
+ "Mixtral-8x22B-v0.1": "mistralai/Mixtral-8x22B-v0.1"
10
+ }
11
+
12
+ def generate_text(prompt, model_choice):
13
+ model = MODEL_OPTIONS[model_choice]
14
+ output = client.text_generation(
15
+ prompt,
16
+ model=model,
17
+ max_new_tokens=1000,
18
+ temperature=0.3,
19
+ stream=True
20
+ )
21
+
22
+ full_response = "```markdown\n" # Start with a markdown code block
23
+ for chunk in output:
24
+ full_response += chunk
25
+ yield full_response + "\n```" # Close the markdown code block
26
+
27
+ def clear_text():
28
+ return "", "", MODEL_OPTIONS["Meta-Llama-3-8B-Instruct"]
29
+
30
+ with gr.Blocks() as iface:
31
+ gr.Markdown("# Text Generation with LLM Models")
32
+ gr.Markdown("Select a model, enter a prompt, and click 'Submit' to get generated text.")
33
+
34
+ with gr.Row():
35
+ model_dropdown = gr.Dropdown(
36
+ choices=list(MODEL_OPTIONS.keys()),
37
+ value="Meta-Llama-3-8B-Instruct",
38
+ label="Select Model"
39
+ )
40
+
41
+ with gr.Row():
42
+ input_text = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
43
+ output_markdown = gr.Markdown() # Changed to Markdown component
44
+
45
+ with gr.Row():
46
+ submit_btn = gr.Button("Submit")
47
+ clear_btn = gr.Button("Clear")
48
+
49
+ submit_btn.click(generate_text, inputs=[input_text, model_dropdown], outputs=output_markdown)
50
+ clear_btn.click(clear_text, outputs=[input_text, output_markdown, model_dropdown])
51
+
52
+ iface.launch()
old/temp.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from text_generation.inference_api import deployed_models
2
+
3
+ print(deployed_models())