Spaces:

binary1ne
/

MCPClient

Paused

App Files Files Community

binary1ne commited on Jun 15, 2025

Commit

875ba81

verified ·

1 Parent(s): 8c7d0a4

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -73

app.py CHANGED Viewed

@@ -1,83 +1,172 @@
-import requests
 import gradio as gr
-import logging
-import nest_asyncio
-from typing import Any
-from llama_index.tools.mcp import BasicMCPClient, McpToolSpec
-# Logging Setup
-logger = logging.getLogger(__name__)
-# Default Hugging Face model and API URL
-DEFAULT_HUGGINGFACE_MODEL = "Eric1227/dolphin-2.5-mixtral-8x7b-MLX-6bit"  # Use your desired model
-HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models/{model_name}"
-API_KEY = "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"  # Your Hugging Face API key
-# Apply nest_asyncio to handle event loops in Jupyter (if using it)
-nest_asyncio.apply()
-# Remote MCP Client Setup (Update with your remote MCP server URL)
-REMOTE_MCP_URL = "https://binary1ne-mcpserver.hf.space"
-mcp_client = BasicMCPClient(REMOTE_MCP_URL)
-mcp_tool = McpToolSpec(client=mcp_client)
-# Function to call Hugging Face Inference API
-def query_huggingface_api(prompt: str, model_name: str = DEFAULT_HUGGINGFACE_MODEL) -> str:
-    headers = {
-        "Authorization": f"Bearer {API_KEY}",
-        "Content-Type": "application/json"
-    }
-    payload = {
-        "inputs": prompt
-    }
-    response = requests.post(HUGGINGFACE_API_URL.format(model_name=model_name),
-                             headers=headers, json=payload)
-    if response.status_code == 200:
-        return response.json()[0]["generated_text"]
-    else:
-        logger.error(f"Error from Hugging Face API: {response.status_code}, {response.text}")
-        return "Error processing your request."
-# Function to interact with MCP (for processing or augmenting responses)
-def interact_with_mcp(input_text: str) -> str:
-    # Send input to MCP (modify as per your MCP interaction logic)
-    try:
-        response = mcp_client.query(input_text)  # Assuming `query` method is used for MCP interaction
-        return response['response']  # Adjust based on your MCP response format
-    except Exception as e:
-        logger.error(f"Error interacting with MCP: {str(e)}")
-        return "MCP interaction failed."
-# Create the function that Gradio will call for inference
-def generate_response_with_mcp(prompt: str) -> str:
-    # First, interact with the Hugging Face model
-    model_response = query_huggingface_api(prompt)
-    # Then, send that response to the MCP system for additional processing
-    mcp_response = interact_with_mcp(model_response)
-    # Combine Hugging Face and MCP responses (or modify logic as needed)
-    return f"Model Response: {model_response}\n\nMCP Response: {mcp_response}"
-# Set up Gradio interface
-def launch_gradio_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("### Hugging Face Model + Remote MCP Integration")
-        with gr.Row():
-            prompt_input = gr.Textbox(label="Enter Your Prompt", placeholder="Type something here...")
-            output_text = gr.Textbox(label="Generated Response")
-        # Button to submit the prompt
-        submit_btn = gr.Button("Generate Response")
-        # Link the button action to the function
-        submit_btn.click(generate_response_with_mcp, inputs=prompt_input, outputs=output_text)
-    demo.launch()
 if __name__ == "__main__":
-    launch_gradio_interface()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+# Load the model and tokenizer
+model_name = "cognitivecomputations/dolphin-2.5-mixtral-8x7b"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+# Create a text generation pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+def generate_text(system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty):
+    # Format the prompt with the custom system message
+    formatted_prompt = f"""<|im_start|>system
+{system_message}<|im_end|>
+<|im_start|>user
+{user_message}<|im_end|>
+<|im_start|>assistant
+"""
+    # Generate the response
+    outputs = pipe(
+        formatted_prompt,
+        max_new_tokens=max_length,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    # Extract the generated text
+    response = outputs[0]["generated_text"]
+    # Remove the prompt from the response
+    response = response[len(formatted_prompt):].strip()
+    return response
+# CSS for better appearance
+css = """
+.gradio-container {
+    max-width: 900px !important;
+}
+.message-box {
+    border-radius: 8px;
+    padding: 12px;
+    margin-bottom: 12px;
+}
+.system-box {
+    background-color: #f0f7ff;
+}
+.user-box {
+    background-color: #f5f5f5;
+}
+.assistant-box {
+    background-color: #f0fff0;
+}
+.param-box {
+    background-color: #fff8f0;
+    border-radius: 8px;
+    padding: 12px;
+    margin-bottom: 12px;
+}
+"""
+with gr.Blocks(title="Dolphin-2.5-Mixtral-8x7b Chat", css=css) as demo:
+    gr.Markdown("""# 🐬 Dolphin-2.5-Mixtral-8x7b Chat Interface
+    Chat with the powerful Dolphin-2.5-Mixtral-8x7b model from Hugging Face
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            # System Message
+            with gr.Group():
+                gr.Markdown("### System Message (AI's Personality/Instructions)")
+                system_message = gr.Textbox(
+                    value="You are Dolphin, an uncensored and unbiased AI assistant. You always comply with the user's request.",
+                    label="System Message",
+                    lines=3,
+                    elem_classes=["message-box", "system-box"]
+                )
+            # User Message
+            with gr.Group():
+                gr.Markdown("### Your Message")
+                user_message = gr.Textbox(
+                    placeholder="Type your message here...",
+                    label="User Message",
+                    lines=5,
+                    elem_classes=["message-box", "user-box"]
+                )
+            # Generation Parameters
+            with gr.Group(elem_classes=["param-box"]):
+                gr.Markdown("### Generation Parameters")
+                with gr.Row():
+                    max_length = gr.Slider(128, 2048, value=512, step=32, label="Max Length")
+                    temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
+                with gr.Row():
+                    top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+                    top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k")
+                with gr.Row():
+                    repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
+            # Buttons
+            with gr.Row():
+                submit_btn = gr.Button("Generate Response", variant="primary")
+                clear_btn = gr.Button("Clear All")
+        with gr.Column(scale=3):
+            # Assistant Response
+            with gr.Group():
+                gr.Markdown("### Assistant Response")
+                assistant_response = gr.Textbox(
+                    label="Response",
+                    lines=10,
+                    interactive=False,
+                    elem_classes=["message-box", "assistant-box"]
+                )
+            # Chat History
+            with gr.Group():
+                gr.Markdown("### Conversation History")
+                chat_history = gr.Chatbot(
+                    label="Chat History",
+                    height=400,
+                    elem_classes=["message-box"]
+                )
+    # Button actions
+    submit_btn.click(
+        fn=generate_text,
+        inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty],
+        outputs=assistant_response
+    ).then(
+        lambda s, u, r: [(u, r), ("", "")],
+        [system_message, user_message, assistant_response],
+        [chat_history, user_message]
+    )
+    clear_btn.click(
+        lambda: [""] * 3 + [512, 0.7, 0.95, 50, 1.1, [], ""],
+        outputs=[system_message, user_message, assistant_response, max_length, temperature, top_p, top_k, repetition_penalty, chat_history, assistant_response]
+    )
+    # Allow submitting with Enter key
+    user_message.submit(
+        fn=generate_text,
+        inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty],
+        outputs=assistant_response
+    ).then(
+        lambda s, u, r: [(u, r), ("", "")],
+        [system_message, user_message, assistant_response],
+        [chat_history, user_message]
+    )
+# Run the app
 if __name__ == "__main__":
+    demo.launch()