Spaces:

d221
/

Everyday_Assistant

Sleeping

App Files Files Community

d221 commited on Feb 1, 2025

Commit

d7e41fd

verified ·

1 Parent(s): 4db1955

Create app.py

Browse files

Files changed (1) hide show

app.py +140 -0

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import gradio as gr
+from huggingface_hub import InferenceClient
+AVAILABLE_MODELS = [
+    "bigscience/bloom-560m",        # Smaller, faster version of BLOOM
+    "bigscience/bloom",            # Original 176B parameter Bloom (heavy)
+    "openlm-research/open_llama_3b",  # Smaller 3B LLaMA-like model
+    "openlm-research/open_llama_7b",  # 7B LLaMA-like model
+    "tiiuae/falcon-7b-instruct",   # Falcon 7B instruct
+    "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",  # OpenAssistant 12B
+    # Add any other open-source models from HF you like
+]
+def chat_with_model(
+    user_message,       # The user’s message
+    history,            # Chat history (handled automatically by Gradio ChatInterface)
+    system_message,     # The system message/instructions from the left panel
+    user_api_key,       # Optional user-provided HF API key
+    model_choice,       # The model chosen from the dropdown
+    max_tokens,         # Max new tokens
+    temperature,        # Temperature
+    top_p               # Top-p
+):
+    """
+    Called every time a user sends a new message.
+    Uses either the user’s provided HF API key or
+    does public inference (anonymous) if none is supplied.
+    """
+    # Decide which key to use
+    final_api_key = user_api_key.strip() if user_api_key else None  # None -> public/no token
+    # Initialize InferenceClient with the chosen API key
+    client = InferenceClient(token=final_api_key)
+    # Build the prompt or system instruction
+    # You can handle chat format in a variety of ways.
+    # For simplicity, we do a naive approach here:
+    prompt = (
+        f"{system_message.strip()}\n\n"  # System instructions
+        f"User: {user_message}\n"
+        "Assistant:"
+    )
+    # Set generation parameters
+    generation_params = dict(
+        temperature=temperature,
+        max_new_tokens=int(max_tokens),
+        top_p=top_p,
+        # Some open-source models do better with a smaller
+        # repetition_penalty or none at all:
+        repetition_penalty=1.0,
+    )
+    # Perform streaming text generation
+    partial_response = ""
+    stream = client.text_generation(
+        prompt=prompt,
+        model=model_choice,    # The user's chosen model
+        stream=True,
+        details=True,
+        **generation_params
+    )
+    for chunk in stream:
+        if chunk.token.special:
+            continue
+        partial_response += chunk.token.text
+        yield partial_response
+with gr.Blocks(theme="soft") as demo:
+    # Title
+    gr.Markdown(
+        """
+        <h1 style="text-align:center; margin-bottom: 5px;">
+            <b>Open-Source GPT Chatbot</b>
+        </h1>
+        """,
+        elem_id="title"
+    )
+    with gr.Row():
+        # Left Column: system msg, HF API key, model dropdown, sliders, etc.
+        with gr.Column(scale=1, min_width=270):
+            system_message = gr.Textbox(
+                label="System Message",
+                value="You are a helpful open-source AI assistant."
+            )
+            user_api_key = gr.Textbox(
+                label="Hugging Face API Key (optional)",
+                type="password",
+                placeholder="Leave blank for public/anonymous usage"
+            )
+            model_choice = gr.Dropdown(
+                label="Select Open-Source Model",
+                choices=AVAILABLE_MODELS,
+                value=AVAILABLE_MODELS[0],  # Default to first in list
+            )
+            max_tokens = gr.Slider(
+                minimum=1,
+                maximum=2000,
+                step=1,
+                value=512,
+                label="Max new tokens"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.9,
+                step=0.01,
+                label="Top-p (nucleus sampling)"
+            )
+        # Right Column: The chat interface
+        with gr.Column(scale=3):
+            chatbot = gr.ChatInterface(
+                fn=chat_with_model,
+                # Additional inputs needed by chat_with_model:
+                additional_inputs=[system_message, user_api_key, model_choice,
+                                   max_tokens, temperature, top_p],
+                type="messages",  # Use newer messages format
+                height=550,
+                title="Open-Source Chatbot"
+            )
+    # Launch the Gradio app
+    demo.launch()