Spaces:

prash616
/

Gemma-2b-TARS-Chat

Sleeping

App Files Files Community

prash616 commited on Feb 19

Commit

d6eda72

verified ·

1 Parent(s): 6d846f9

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -28

app.py CHANGED Viewed

@@ -2,50 +2,49 @@ import os
 import gradio as gr
 from huggingface_hub import InferenceClient
 token = os.environ.get("HF_TOKEN")
-# 🚀 THE BREAKTHROUGH: Bypassing the "Provider Router"
-# By passing the explicit URL instead of the model name, we force Hugging Face
-# to use your free Serverless API, eliminating the 'model_not_supported' error.
-model_url = "https://api-inference.huggingface.co/models/prash616/Gemma-2b-TARS-SFT"
-client = InferenceClient(model=model_url, token=token)
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    messages = [{"role": "system", "content": system_message}]
-    # 🛡️ DYNAMIC HISTORY PARSER
-    # This automatically adapts whether your Space is running Gradio 4 (lists) or Gradio 5+ (dicts)
     for item in history:
         if isinstance(item, dict):
-            messages.append(item)
-        elif isinstance(item, (list, tuple)) and len(item) == 2:
-            if item[0]: messages.append({"role": "user", "content": item[0]})
-            if item[1]: messages.append({"role": "assistant", "content": item[1]})
-    messages.append({"role": "user", "content": message})
     response = ""
     try:
-        # We return to the much safer, natively-formatted chat_completion engine
-        for chunk in client.chat_completion(
-            messages,
-            max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         ):
-            # Extract the generated text chunk safely
-            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
-                response += chunk.choices[0].delta.content
-                yield response
     except Exception as e:
-        # Using repr(e) guarantees we will NEVER get a blank error message again.
-        yield f"⚠️ TARS API Error: {type(e).__name__} - {repr(e)}"
-# The clean UI initialization
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
         gr.Textbox(
             value="You are TARS, a creative and technical assistant created by Prashant.",
@@ -56,7 +55,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
     ],
     title="🌌 TARS 1.1 Interface",
-    description="Fine-tuned Gemma-2b-TARS-SFT | Creative Intelligence",
 )
 if __name__ == "__main__":

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Retrieve the token securely from the Space Secrets
 token = os.environ.get("HF_TOKEN")
+# Initialize the Inference Client pointing directly to your custom model
+client = InferenceClient(model="prash616/Gemma-2b-TARS-SFT", token=token)
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # 1. Manually construct the Gemma-2 Instruction Prompt
+    prompt = f"<start_of_turn>system\n{system_message}<end_of_turn>\n"
+    # 2. Dynamic History Parser (Supports modern Gradio dictionary format)
     for item in history:
         if isinstance(item, dict):
+            # Extract roles safely from the dictionary
+            role = "model" if item.get("role") == "assistant" else "user"
+            content = item.get("content", "")
+            prompt += f"<start_of_turn>{role}\n{content}<end_of_turn>\n"
+    # 3. Append the current user message and trigger the model turn
+    prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
     response = ""
     try:
+        # 4. Use raw text_generation to bypass the restricted chat router
+        for token_text in client.text_generation(
+            prompt=prompt,
+            max_new_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
+            stop=["<end_of_turn>", "<start_of_turn>"] # Updated from deprecated stop_sequences
         ):
+            response += token_text
+            # Gradio requires yielding the fully built string iteratively
+            yield response
     except Exception as e:
+        # Provide a clear, readable error if the server is still waking up
+        yield f"⚠️ Connection Error: {str(e)}\n\n(If the model is cold, please wait 60 seconds and try again.)"
+# Initialize the UI components
 demo = gr.ChatInterface(
+    fn=respond,
     additional_inputs=[
         gr.Textbox(
             value="You are TARS, a creative and technical assistant created by Prashant.",
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
     ],
     title="🌌 TARS 1.1 Interface",
+    description="Fine-tuned Gemma-2b-TARS-SFT | Running on Serverless API",
 )
 if __name__ == "__main__":