CedricZ commited on
Commit
19df4a4
·
1 Parent(s): b3c7565
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  import os
3
- from huggingface_hub import InferenceClient
 
 
4
 
5
  def respond(
6
  message,
@@ -8,21 +10,21 @@ def respond(
8
  temperature,
9
  top_p
10
  ):
11
-
12
- client = InferenceClient(token=os.getenv('access_token'), model="meta-llama/Meta-Llama-3.1-8B", provider="featherless-ai")
13
-
14
- try:
15
- output = client.text_generation(
16
- message,
17
- max_new_tokens=max_tokens,
18
- stream=False,
19
- temperature=temperature,
20
- top_p=top_p,
21
- repetition_penalty=1.1)
22
- except:
23
- output = "Error: Too many requests at the moment. Please try submit again in a few seconds."
24
-
25
- return output
26
 
27
 
28
  demo = gr.Interface(
 
1
  import gradio as gr
2
  import os
3
+ import requests
4
+
5
+ API_KEY = os.getenv("access_token")
6
 
7
  def respond(
8
  message,
 
10
  temperature,
11
  top_p
12
  ):
13
+ response = requests.post(
14
+ url="https://api.featherless.ai/v1/completions",
15
+ headers={
16
+ "Authorization": f"Bearer {API_KEY}",
17
+ "Content-type": "application/json"
18
+ },
19
+ json={
20
+ "model": "Qwen/Qwen3-8B",
21
+ "prompt": message,
22
+ "max_tokens": max_tokens,
23
+ "temperature": temperature,
24
+ "top_p": top_p
25
+ }
26
+ )
27
+ return response.json()["choices"][0]["text"]
28
 
29
 
30
  demo = gr.Interface(