Spaces:

Nullpointer-KK
/

Demo_OS_BaseLLM

Sleeping

App Files Files Community

Nullpointer-KK commited on Aug 28, 2025

Commit

3af1594

verified ·

1 Parent(s): 63b7a2a

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -40

app.py CHANGED Viewed

@@ -1,58 +1,82 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
     """
-    Chat with a base LLM hosted on Hugging Face Hub.
-    Uses streaming to show tokens as they arrive.
     """
-    # Replace with a model you have access to, e.g. "meta-llama/Llama-2-7b-chat-hf"
-    client = InferenceClient(model="openai/gpt-oss-20b", token=hf_token.token)
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for chunk in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
         stream=True,
-    ):
-        if len(chunk.choices) and chunk.choices[0].delta.content:
-            token = chunk.choices[0].delta.content
-            response += token
-            yield response
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
-    ],
-)
 with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Available open-source base models (completion style)
+MODEL_CHOICES = {
+    "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
+    "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
+    "LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf",
+}
+def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken):
     """
+    Get a plain text completion from a Hugging Face-hosted open-source LLM.
+    Streams output token-by-token.
     """
+    if not hf_token or not hf_token.token:
+        yield "⚠️ Please log in with your Hugging Face account (for gated models like LLaMA-2)."
+        return
+    model_id = MODEL_CHOICES[model_choice]
+    client = InferenceClient(model=model_id, token=hf_token.token)
+    response_text = ""
+    stream = client.text_generation(
+        prompt,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
         stream=True,
+        repetition_penalty=1.0,
+    )
+    for event in stream:
+        # Each event is a string chunk
+        response_text += event
+        yield response_text
 with gr.Blocks() as demo:
+    gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs")
+    gr.Markdown(
+        "Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, "
+        "and watch the model complete your text."
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Type the beginning of your text...",
+                lines=4,
+            )
+            max_tokens = gr.Slider(
+                minimum=1, maximum=1024, value=100, step=1, label="Max tokens"
+            )
+            temperature = gr.Slider(
+                minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"
+            )
+            top_p = gr.Slider(
+                minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p"
+            )
+            model_choice = gr.Dropdown(
+                choices=list(MODEL_CHOICES.keys()),
+                value=list(MODEL_CHOICES.keys())[0],
+                label="Choose a model",
+            )
+            submit = gr.Button("Generate Completion")
+        with gr.Column(scale=3):
+            output = gr.Textbox(
+                label="Generated Completion",
+                lines=15,
+            )
+    submit.click(
+        fn=complete_text,
+        inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken()],
+        outputs=output,
+    )
 if __name__ == "__main__":
+    demo.launch()