xx
Browse files
app.py
CHANGED
|
@@ -105,55 +105,57 @@ def model_inference(input_dict, history, *additional_inputs):
|
|
| 105 |
yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
|
| 106 |
return
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
response = ""
|
| 112 |
-
yield progress_bar_html("Processing...")
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
token =
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
|
| 159 |
examples = [
|
|
|
|
| 105 |
yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
|
| 106 |
return
|
| 107 |
|
| 108 |
+
client = InferenceClient(
|
| 109 |
+
token=hf_token.token, model=model_name, provider="hf-inference"
|
| 110 |
+
)
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
response = ""
|
| 113 |
+
for message in client.chat_completion(
|
| 114 |
+
messages,
|
| 115 |
+
max_tokens=1024,
|
| 116 |
+
stream=True,
|
| 117 |
+
):
|
| 118 |
+
choices = message.choices
|
| 119 |
+
token = ""
|
| 120 |
+
if len(choices) and choices[0].delta.content:
|
| 121 |
+
token = choices[0].delta.content
|
| 122 |
+
|
| 123 |
+
response += token
|
| 124 |
+
yield response
|
| 125 |
+
|
| 126 |
+
# for chunk in stream:
|
| 127 |
+
# # chunk can be an object with attributes or a dict depending on client version
|
| 128 |
+
# token = ""
|
| 129 |
+
# try:
|
| 130 |
+
# # attempt dict-style
|
| 131 |
+
# if isinstance(chunk, dict):
|
| 132 |
+
# choices = chunk.get("choices")
|
| 133 |
+
# if choices and len(choices) > 0:
|
| 134 |
+
# delta = choices[0].get("delta", {})
|
| 135 |
+
# token = delta.get("content") or ""
|
| 136 |
+
# else:
|
| 137 |
+
# # attribute-style
|
| 138 |
+
# choices = getattr(chunk, "choices", None)
|
| 139 |
+
# if choices and len(choices) > 0:
|
| 140 |
+
# delta = getattr(choices[0], "delta", None)
|
| 141 |
+
# if isinstance(delta, dict):
|
| 142 |
+
# token = delta.get("content") or ""
|
| 143 |
+
# else:
|
| 144 |
+
# token = getattr(delta, "content", "")
|
| 145 |
+
# except Exception:
|
| 146 |
+
# token = ""
|
| 147 |
+
|
| 148 |
+
# if token:
|
| 149 |
+
# # escape incremental token to avoid raw HTML breaking the chat box
|
| 150 |
+
# response += html.escape(token)
|
| 151 |
+
# time.sleep(0.001)
|
| 152 |
+
# yield response
|
| 153 |
+
|
| 154 |
+
# # ensure we yield at least one final message so the async iterator doesn't see StopIteration
|
| 155 |
+
# if response:
|
| 156 |
+
# yield response
|
| 157 |
+
# else:
|
| 158 |
+
# yield "(no text was returned by the model)"
|
| 159 |
|
| 160 |
|
| 161 |
examples = [
|