MultiModelCoder

Sleeping

App Files Files Community

w1r4 commited on Jan 5

Commit

146c824

verified ·

1 Parent(s): 92a045a

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -1,30 +1,33 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-# We use the 32B Coder model which is generally available on the free API
 model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
-def respond(message, history):
-    # Initialize the client inside the function to handle sessions correctly
     client = InferenceClient(model_id)
-    # Build the message history for the API
-    messages = []
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
-    # Generate the response
     response_text = ""
     try:
-        # Stream the response
         stream = client.chat_completion(
             messages,
             max_tokens=2048,
             stream=True,
-            temperature=0.7
         )
         for chunk in stream:
             content = chunk.choices[0].delta.content
@@ -32,16 +35,33 @@ def respond(message, history):
                 response_text += content
                 yield response_text
     except Exception as e:
-        yield f"Error: {str(e)}. The model might be busy or too large for the current free tier."
 # Build the UI
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("# AI Coding Assistant")
         gr.Markdown(f"Running **{model_id}**")
-        gr.Markdown("If you see an error, the free API might be overloaded. Try again in a minute.")
         gr.LoginButton("Sign in")
-    gr.ChatInterface(respond)
 demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Using Qwen 2.5 Coder
 model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+def respond(message, history, system_message, temperature):
+    # Initialize the client
     client = InferenceClient(model_id)
+    # 1. Start with the System Message
+    messages = [{"role": "system", "content": system_message}]
+    # 2. Add the history
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
+    # 3. Add the current user message
     messages.append({"role": "user", "content": message})
+    # Generate response
     response_text = ""
     try:
         stream = client.chat_completion(
             messages,
             max_tokens=2048,
             stream=True,
+            temperature=temperature,
+            top_p=0.9
         )
         for chunk in stream:
             content = chunk.choices[0].delta.content
                 response_text += content
                 yield response_text
     except Exception as e:
+        yield f"Error: {str(e)}. The model might be busy."
 # Build the UI
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("# AI Coding Assistant")
         gr.Markdown(f"Running **{model_id}**")
         gr.LoginButton("Sign in")
+    gr.ChatInterface(
+        respond,
+        additional_inputs=[
+            # System Message Input
+            gr.Textbox(
+                value="You are a helpful assistant.",
+                label="System Instruction",
+                lines=2
+            ),
+            # Temperature Slider
+            gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+        ]
+    )
 demo.launch()