13ilguun commited on
Commit
0333e82
·
verified ·
1 Parent(s): e420de1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
3
  import os
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
  client = InferenceClient(
9
  model="HuggingFaceH4/zephyr-7b-beta",
10
  token=os.getenv("huggingface_token"),
@@ -38,14 +40,36 @@ def respond(
38
  top_p=top_p,
39
  ):
40
  token = message.choices[0].delta.content
41
-
42
  response += token
43
  yield response
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- """
47
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
48
- """
49
  demo = gr.ChatInterface(
50
  respond,
51
  additional_inputs=[
@@ -57,11 +81,9 @@ demo = gr.ChatInterface(
57
  maximum=1.0,
58
  value=0.95,
59
  step=0.05,
60
- label="Top-p (nucleus sampling)",
61
- ),
62
  ],
63
  )
64
 
65
-
66
- if __name__ == "__main__":
67
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.responses import JSONResponse
5
  import os
6
 
7
+ # Gradio app + FastAPI mount
8
+ app = FastAPI()
9
+
10
  client = InferenceClient(
11
  model="HuggingFaceH4/zephyr-7b-beta",
12
  token=os.getenv("huggingface_token"),
 
40
  top_p=top_p,
41
  ):
42
  token = message.choices[0].delta.content
 
43
  response += token
44
  yield response
45
 
46
+ # Define FastAPI POST endpoint
47
+ @app.post("/chat")
48
+ async def chat(request: Request):
49
+ data = await request.json()
50
+
51
+ message = data.get("message")
52
+ persona = data.get("persona", "You are a friendly Chatbot.")
53
+ max_tokens = data.get("max_tokens", 512)
54
+ temperature = data.get("temperature", 0.7)
55
+ top_p = data.get("top_p", 0.95)
56
+
57
+ messages = [{"role": "system", "content": persona}, {"role": "user", "content": message}]
58
+ full_response = ""
59
+
60
+ for chunk in client.chat_completion(
61
+ messages,
62
+ max_tokens=max_tokens,
63
+ stream=True,
64
+ temperature=temperature,
65
+ top_p=top_p,
66
+ ):
67
+ full_response += chunk.choices[0].delta.content or ""
68
+
69
+ return JSONResponse({"response": full_response})
70
+
71
 
72
+ # Gradio demo for UI access
 
 
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
 
81
  maximum=1.0,
82
  value=0.95,
83
  step=0.05,
84
+ label="Top-p (nucleus sampling)"),
 
85
  ],
86
  )
87
 
88
+ # Mount Gradio app at "/"
89
+ app = gr.mount_gradio_app(app, demo, path="/")