prash616 commited on
Commit
d6eda72
·
verified ·
1 Parent(s): 6d846f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -28
app.py CHANGED
@@ -2,50 +2,49 @@ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
 
5
  token = os.environ.get("HF_TOKEN")
6
 
7
- # 🚀 THE BREAKTHROUGH: Bypassing the "Provider Router"
8
- # By passing the explicit URL instead of the model name, we force Hugging Face
9
- # to use your free Serverless API, eliminating the 'model_not_supported' error.
10
- model_url = "https://api-inference.huggingface.co/models/prash616/Gemma-2b-TARS-SFT"
11
- client = InferenceClient(model=model_url, token=token)
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
- messages = [{"role": "system", "content": system_message}]
 
15
 
16
- # 🛡️ DYNAMIC HISTORY PARSER
17
- # This automatically adapts whether your Space is running Gradio 4 (lists) or Gradio 5+ (dicts)
18
  for item in history:
19
  if isinstance(item, dict):
20
- messages.append(item)
21
- elif isinstance(item, (list, tuple)) and len(item) == 2:
22
- if item[0]: messages.append({"role": "user", "content": item[0]})
23
- if item[1]: messages.append({"role": "assistant", "content": item[1]})
24
-
25
- messages.append({"role": "user", "content": message})
 
26
 
27
  response = ""
28
  try:
29
- # We return to the much safer, natively-formatted chat_completion engine
30
- for chunk in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
 
36
  ):
37
- # Extract the generated text chunk safely
38
- if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
39
- response += chunk.choices[0].delta.content
40
- yield response
41
-
42
  except Exception as e:
43
- # Using repr(e) guarantees we will NEVER get a blank error message again.
44
- yield f"⚠️ TARS API Error: {type(e).__name__} - {repr(e)}"
45
 
46
- # The clean UI initialization
47
  demo = gr.ChatInterface(
48
- respond,
49
  additional_inputs=[
50
  gr.Textbox(
51
  value="You are TARS, a creative and technical assistant created by Prashant.",
@@ -56,7 +55,7 @@ demo = gr.ChatInterface(
56
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
57
  ],
58
  title="🌌 TARS 1.1 Interface",
59
- description="Fine-tuned Gemma-2b-TARS-SFT | Creative Intelligence",
60
  )
61
 
62
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
+ # Retrieve the token securely from the Space Secrets
6
  token = os.environ.get("HF_TOKEN")
7
 
8
+ # Initialize the Inference Client pointing directly to your custom model
9
+ client = InferenceClient(model="prash616/Gemma-2b-TARS-SFT", token=token)
 
 
 
10
 
11
  def respond(message, history, system_message, max_tokens, temperature, top_p):
12
+ # 1. Manually construct the Gemma-2 Instruction Prompt
13
+ prompt = f"<start_of_turn>system\n{system_message}<end_of_turn>\n"
14
 
15
+ # 2. Dynamic History Parser (Supports modern Gradio dictionary format)
 
16
  for item in history:
17
  if isinstance(item, dict):
18
+ # Extract roles safely from the dictionary
19
+ role = "model" if item.get("role") == "assistant" else "user"
20
+ content = item.get("content", "")
21
+ prompt += f"<start_of_turn>{role}\n{content}<end_of_turn>\n"
22
+
23
+ # 3. Append the current user message and trigger the model turn
24
+ prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
25
 
26
  response = ""
27
  try:
28
+ # 4. Use raw text_generation to bypass the restricted chat router
29
+ for token_text in client.text_generation(
30
+ prompt=prompt,
31
+ max_new_tokens=max_tokens,
32
  stream=True,
33
  temperature=temperature,
34
  top_p=top_p,
35
+ stop=["<end_of_turn>", "<start_of_turn>"] # Updated from deprecated stop_sequences
36
  ):
37
+ response += token_text
38
+ # Gradio requires yielding the fully built string iteratively
39
+ yield response
40
+
 
41
  except Exception as e:
42
+ # Provide a clear, readable error if the server is still waking up
43
+ yield f"⚠️ Connection Error: {str(e)}\n\n(If the model is cold, please wait 60 seconds and try again.)"
44
 
45
+ # Initialize the UI components
46
  demo = gr.ChatInterface(
47
+ fn=respond,
48
  additional_inputs=[
49
  gr.Textbox(
50
  value="You are TARS, a creative and technical assistant created by Prashant.",
 
55
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
56
  ],
57
  title="🌌 TARS 1.1 Interface",
58
+ description="Fine-tuned Gemma-2b-TARS-SFT | Running on Serverless API",
59
  )
60
 
61
  if __name__ == "__main__":