binary1ne commited on
Commit
875ba81
·
verified ·
1 Parent(s): 8c7d0a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -73
app.py CHANGED
@@ -1,83 +1,172 @@
1
- import requests
2
  import gradio as gr
3
- import logging
4
- import nest_asyncio
5
- from typing import Any
6
- from llama_index.tools.mcp import BasicMCPClient, McpToolSpec
7
 
8
- # Logging Setup
9
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
10
 
11
- # Default Hugging Face model and API URL
12
- DEFAULT_HUGGINGFACE_MODEL = "Eric1227/dolphin-2.5-mixtral-8x7b-MLX-6bit" # Use your desired model
13
- HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models/{model_name}"
14
- API_KEY = "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" # Your Hugging Face API key
 
 
 
 
15
 
16
- # Apply nest_asyncio to handle event loops in Jupyter (if using it)
17
- nest_asyncio.apply()
18
-
19
- # Remote MCP Client Setup (Update with your remote MCP server URL)
20
- REMOTE_MCP_URL = "https://binary1ne-mcpserver.hf.space"
21
- mcp_client = BasicMCPClient(REMOTE_MCP_URL)
22
- mcp_tool = McpToolSpec(client=mcp_client)
23
-
24
- # Function to call Hugging Face Inference API
25
- def query_huggingface_api(prompt: str, model_name: str = DEFAULT_HUGGINGFACE_MODEL) -> str:
26
- headers = {
27
- "Authorization": f"Bearer {API_KEY}",
28
- "Content-Type": "application/json"
29
- }
30
-
31
- payload = {
32
- "inputs": prompt
33
- }
34
-
35
- response = requests.post(HUGGINGFACE_API_URL.format(model_name=model_name),
36
- headers=headers, json=payload)
37
-
38
- if response.status_code == 200:
39
- return response.json()[0]["generated_text"]
40
- else:
41
- logger.error(f"Error from Hugging Face API: {response.status_code}, {response.text}")
42
- return "Error processing your request."
43
-
44
- # Function to interact with MCP (for processing or augmenting responses)
45
- def interact_with_mcp(input_text: str) -> str:
46
- # Send input to MCP (modify as per your MCP interaction logic)
47
- try:
48
- response = mcp_client.query(input_text) # Assuming `query` method is used for MCP interaction
49
- return response['response'] # Adjust based on your MCP response format
50
- except Exception as e:
51
- logger.error(f"Error interacting with MCP: {str(e)}")
52
- return "MCP interaction failed."
53
-
54
- # Create the function that Gradio will call for inference
55
- def generate_response_with_mcp(prompt: str) -> str:
56
- # First, interact with the Hugging Face model
57
- model_response = query_huggingface_api(prompt)
58
 
59
- # Then, send that response to the MCP system for additional processing
60
- mcp_response = interact_with_mcp(model_response)
61
 
62
- # Combine Hugging Face and MCP responses (or modify logic as needed)
63
- return f"Model Response: {model_response}\n\nMCP Response: {mcp_response}"
64
 
65
- # Set up Gradio interface
66
- def launch_gradio_interface():
67
- with gr.Blocks() as demo:
68
- gr.Markdown("### Hugging Face Model + Remote MCP Integration")
69
-
70
- with gr.Row():
71
- prompt_input = gr.Textbox(label="Enter Your Prompt", placeholder="Type something here...")
72
- output_text = gr.Textbox(label="Generated Response")
73
-
74
- # Button to submit the prompt
75
- submit_btn = gr.Button("Generate Response")
76
-
77
- # Link the button action to the function
78
- submit_btn.click(generate_response_with_mcp, inputs=prompt_input, outputs=output_text)
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
 
82
  if __name__ == "__main__":
83
- launch_gradio_interface()
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
 
 
4
 
5
+ # Load the model and tokenizer
6
+ model_name = "cognitivecomputations/dolphin-2.5-mixtral-8x7b"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ torch_dtype=torch.float16,
11
+ device_map="auto"
12
+ )
13
 
14
+ # Create a text generation pipeline
15
+ pipe = pipeline(
16
+ "text-generation",
17
+ model=model,
18
+ tokenizer=tokenizer,
19
+ torch_dtype=torch.float16,
20
+ device_map="auto"
21
+ )
22
 
23
+ def generate_text(system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty):
24
+ # Format the prompt with the custom system message
25
+ formatted_prompt = f"""<|im_start|>system
26
+ {system_message}<|im_end|>
27
+ <|im_start|>user
28
+ {user_message}<|im_end|>
29
+ <|im_start|>assistant
30
+ """
31
+
32
+ # Generate the response
33
+ outputs = pipe(
34
+ formatted_prompt,
35
+ max_new_tokens=max_length,
36
+ do_sample=True,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ top_k=top_k,
40
+ repetition_penalty=repetition_penalty,
41
+ pad_token_id=tokenizer.eos_token_id
42
+ )
43
+
44
+ # Extract the generated text
45
+ response = outputs[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # Remove the prompt from the response
48
+ response = response[len(formatted_prompt):].strip()
49
 
50
+ return response
 
51
 
52
+ # CSS for better appearance
53
+ css = """
54
+ .gradio-container {
55
+ max-width: 900px !important;
56
+ }
57
+ .message-box {
58
+ border-radius: 8px;
59
+ padding: 12px;
60
+ margin-bottom: 12px;
61
+ }
62
+ .system-box {
63
+ background-color: #f0f7ff;
64
+ }
65
+ .user-box {
66
+ background-color: #f5f5f5;
67
+ }
68
+ .assistant-box {
69
+ background-color: #f0fff0;
70
+ }
71
+ .param-box {
72
+ background-color: #fff8f0;
73
+ border-radius: 8px;
74
+ padding: 12px;
75
+ margin-bottom: 12px;
76
+ }
77
+ """
78
 
79
+ with gr.Blocks(title="Dolphin-2.5-Mixtral-8x7b Chat", css=css) as demo:
80
+ gr.Markdown("""# 🐬 Dolphin-2.5-Mixtral-8x7b Chat Interface
81
+ Chat with the powerful Dolphin-2.5-Mixtral-8x7b model from Hugging Face
82
+ """)
83
+
84
+ with gr.Row():
85
+ with gr.Column(scale=2):
86
+ # System Message
87
+ with gr.Group():
88
+ gr.Markdown("### System Message (AI's Personality/Instructions)")
89
+ system_message = gr.Textbox(
90
+ value="You are Dolphin, an uncensored and unbiased AI assistant. You always comply with the user's request.",
91
+ label="System Message",
92
+ lines=3,
93
+ elem_classes=["message-box", "system-box"]
94
+ )
95
+
96
+ # User Message
97
+ with gr.Group():
98
+ gr.Markdown("### Your Message")
99
+ user_message = gr.Textbox(
100
+ placeholder="Type your message here...",
101
+ label="User Message",
102
+ lines=5,
103
+ elem_classes=["message-box", "user-box"]
104
+ )
105
+
106
+ # Generation Parameters
107
+ with gr.Group(elem_classes=["param-box"]):
108
+ gr.Markdown("### Generation Parameters")
109
+ with gr.Row():
110
+ max_length = gr.Slider(128, 2048, value=512, step=32, label="Max Length")
111
+ temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
112
+ with gr.Row():
113
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
114
+ top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k")
115
+ with gr.Row():
116
+ repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
117
+
118
+ # Buttons
119
+ with gr.Row():
120
+ submit_btn = gr.Button("Generate Response", variant="primary")
121
+ clear_btn = gr.Button("Clear All")
122
+
123
+ with gr.Column(scale=3):
124
+ # Assistant Response
125
+ with gr.Group():
126
+ gr.Markdown("### Assistant Response")
127
+ assistant_response = gr.Textbox(
128
+ label="Response",
129
+ lines=10,
130
+ interactive=False,
131
+ elem_classes=["message-box", "assistant-box"]
132
+ )
133
+
134
+ # Chat History
135
+ with gr.Group():
136
+ gr.Markdown("### Conversation History")
137
+ chat_history = gr.Chatbot(
138
+ label="Chat History",
139
+ height=400,
140
+ elem_classes=["message-box"]
141
+ )
142
+
143
+ # Button actions
144
+ submit_btn.click(
145
+ fn=generate_text,
146
+ inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty],
147
+ outputs=assistant_response
148
+ ).then(
149
+ lambda s, u, r: [(u, r), ("", "")],
150
+ [system_message, user_message, assistant_response],
151
+ [chat_history, user_message]
152
+ )
153
+
154
+ clear_btn.click(
155
+ lambda: [""] * 3 + [512, 0.7, 0.95, 50, 1.1, [], ""],
156
+ outputs=[system_message, user_message, assistant_response, max_length, temperature, top_p, top_k, repetition_penalty, chat_history, assistant_response]
157
+ )
158
+
159
+ # Allow submitting with Enter key
160
+ user_message.submit(
161
+ fn=generate_text,
162
+ inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty],
163
+ outputs=assistant_response
164
+ ).then(
165
+ lambda s, u, r: [(u, r), ("", "")],
166
+ [system_message, user_message, assistant_response],
167
+ [chat_history, user_message]
168
+ )
169
 
170
+ # Run the app
171
  if __name__ == "__main__":
172
+ demo.launch()