w1r4 commited on
Commit
146c824
·
verified ·
1 Parent(s): 92a045a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -11
app.py CHANGED
@@ -1,30 +1,33 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- # We use the 32B Coder model which is generally available on the free API
5
  model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
6
 
7
- def respond(message, history):
8
- # Initialize the client inside the function to handle sessions correctly
9
  client = InferenceClient(model_id)
10
 
11
- # Build the message history for the API
12
- messages = []
 
 
13
  for user_msg, bot_msg in history:
14
  messages.append({"role": "user", "content": user_msg})
15
  messages.append({"role": "assistant", "content": bot_msg})
16
 
 
17
  messages.append({"role": "user", "content": message})
18
 
19
- # Generate the response
20
  response_text = ""
21
  try:
22
- # Stream the response
23
  stream = client.chat_completion(
24
  messages,
25
  max_tokens=2048,
26
  stream=True,
27
- temperature=0.7
 
28
  )
29
  for chunk in stream:
30
  content = chunk.choices[0].delta.content
@@ -32,16 +35,33 @@ def respond(message, history):
32
  response_text += content
33
  yield response_text
34
  except Exception as e:
35
- yield f"Error: {str(e)}. The model might be busy or too large for the current free tier."
36
 
37
  # Build the UI
38
  with gr.Blocks(fill_height=True) as demo:
39
  with gr.Sidebar():
40
  gr.Markdown("# AI Coding Assistant")
41
  gr.Markdown(f"Running **{model_id}**")
42
- gr.Markdown("If you see an error, the free API might be overloaded. Try again in a minute.")
43
  gr.LoginButton("Sign in")
44
 
45
- gr.ChatInterface(respond)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Using Qwen 2.5 Coder
5
  model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
6
 
7
+ def respond(message, history, system_message, temperature):
8
+ # Initialize the client
9
  client = InferenceClient(model_id)
10
 
11
+ # 1. Start with the System Message
12
+ messages = [{"role": "system", "content": system_message}]
13
+
14
+ # 2. Add the history
15
  for user_msg, bot_msg in history:
16
  messages.append({"role": "user", "content": user_msg})
17
  messages.append({"role": "assistant", "content": bot_msg})
18
 
19
+ # 3. Add the current user message
20
  messages.append({"role": "user", "content": message})
21
 
22
+ # Generate response
23
  response_text = ""
24
  try:
 
25
  stream = client.chat_completion(
26
  messages,
27
  max_tokens=2048,
28
  stream=True,
29
+ temperature=temperature,
30
+ top_p=0.9
31
  )
32
  for chunk in stream:
33
  content = chunk.choices[0].delta.content
 
35
  response_text += content
36
  yield response_text
37
  except Exception as e:
38
+ yield f"Error: {str(e)}. The model might be busy."
39
 
40
  # Build the UI
41
  with gr.Blocks(fill_height=True) as demo:
42
  with gr.Sidebar():
43
  gr.Markdown("# AI Coding Assistant")
44
  gr.Markdown(f"Running **{model_id}**")
 
45
  gr.LoginButton("Sign in")
46
 
47
+ gr.ChatInterface(
48
+ respond,
49
+ additional_inputs=[
50
+ # System Message Input
51
+ gr.Textbox(
52
+ value="You are a helpful assistant.",
53
+ label="System Instruction",
54
+ lines=2
55
+ ),
56
+ # Temperature Slider
57
+ gr.Slider(
58
+ minimum=0.1,
59
+ maximum=2.0,
60
+ value=0.7,
61
+ step=0.1,
62
+ label="Temperature"
63
+ )
64
+ ]
65
+ )
66
 
67
  demo.launch()