Spaces:
Paused
Paused
Commit ·
41f2a92
1
Parent(s): e0bc5c6
fix + client
Browse files
.vscode/settings.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python.pythonPath": "/Users/michael/.pyenv/versions/3.12.3/bin/python3.12"
|
| 3 |
+
}
|
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from fastapi.responses import StreamingResponse
|
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from pydantic import BaseModel
|
| 5 |
from typing import List, Tuple
|
| 6 |
-
import asyncio
|
| 7 |
|
| 8 |
# Initialisation du client Hugging Face
|
| 9 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
@@ -12,6 +11,8 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
| 12 |
app = FastAPI()
|
| 13 |
|
| 14 |
# Modèle pour les données d'entrée
|
|
|
|
|
|
|
| 15 |
class PredictionRequest(BaseModel):
|
| 16 |
message: str
|
| 17 |
history: List[Tuple[str, str]] = []
|
|
@@ -21,20 +22,21 @@ class PredictionRequest(BaseModel):
|
|
| 21 |
top_p: float = 0.95
|
| 22 |
|
| 23 |
|
| 24 |
-
|
| 25 |
"""
|
| 26 |
-
Générateur
|
| 27 |
"""
|
| 28 |
messages = [{"role": "system", "content": request.system_message}]
|
| 29 |
for user_input, assistant_response in request.history:
|
| 30 |
if user_input:
|
| 31 |
messages.append({"role": "user", "content": user_input})
|
| 32 |
if assistant_response:
|
| 33 |
-
messages.append(
|
|
|
|
| 34 |
messages.append({"role": "user", "content": request.message})
|
| 35 |
-
|
| 36 |
try:
|
| 37 |
-
|
| 38 |
messages,
|
| 39 |
max_tokens=request.max_tokens,
|
| 40 |
stream=True,
|
|
@@ -44,7 +46,7 @@ async def generate_stream(request: PredictionRequest):
|
|
| 44 |
token = message.choices[0].delta.content
|
| 45 |
yield token
|
| 46 |
except Exception as e:
|
| 47 |
-
|
| 48 |
|
| 49 |
|
| 50 |
@app.post("/predict")
|
|
|
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from pydantic import BaseModel
|
| 5 |
from typing import List, Tuple
|
|
|
|
| 6 |
|
| 7 |
# Initialisation du client Hugging Face
|
| 8 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
| 11 |
app = FastAPI()
|
| 12 |
|
| 13 |
# Modèle pour les données d'entrée
|
| 14 |
+
|
| 15 |
+
|
| 16 |
class PredictionRequest(BaseModel):
|
| 17 |
message: str
|
| 18 |
history: List[Tuple[str, str]] = []
|
|
|
|
| 22 |
top_p: float = 0.95
|
| 23 |
|
| 24 |
|
| 25 |
+
def generate_stream(request: PredictionRequest):
|
| 26 |
"""
|
| 27 |
+
Générateur synchrone pour le streaming de réponse.
|
| 28 |
"""
|
| 29 |
messages = [{"role": "system", "content": request.system_message}]
|
| 30 |
for user_input, assistant_response in request.history:
|
| 31 |
if user_input:
|
| 32 |
messages.append({"role": "user", "content": user_input})
|
| 33 |
if assistant_response:
|
| 34 |
+
messages.append(
|
| 35 |
+
{"role": "assistant", "content": assistant_response})
|
| 36 |
messages.append({"role": "user", "content": request.message})
|
| 37 |
+
|
| 38 |
try:
|
| 39 |
+
for message in client.chat_completion(
|
| 40 |
messages,
|
| 41 |
max_tokens=request.max_tokens,
|
| 42 |
stream=True,
|
|
|
|
| 46 |
token = message.choices[0].delta.content
|
| 47 |
yield token
|
| 48 |
except Exception as e:
|
| 49 |
+
yield f"Error: {str(e)}"
|
| 50 |
|
| 51 |
|
| 52 |
@app.post("/predict")
|
client.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
|
| 3 |
+
# URL de ton endpoint
|
| 4 |
+
url = "https://micksoftware-laria-startup.hf.space/predict"
|
| 5 |
+
|
| 6 |
+
# Données pour la requête
|
| 7 |
+
payload = {
|
| 8 |
+
"message": "salut",
|
| 9 |
+
"history": [],
|
| 10 |
+
"system_message": "You are a friendly Chatbot.",
|
| 11 |
+
"max_tokens": 512,
|
| 12 |
+
"temperature": 0.7,
|
| 13 |
+
"top_p": 0.95,
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
# Faire la requête avec streaming activé
|
| 17 |
+
response = requests.post(url, json=payload, stream=True)
|
| 18 |
+
|
| 19 |
+
# Lire la réponse en streaming
|
| 20 |
+
if response.status_code == 200:
|
| 21 |
+
print("Streaming response:")
|
| 22 |
+
for chunk in response.iter_lines(decode_unicode=True):
|
| 23 |
+
if chunk:
|
| 24 |
+
print(chunk, end="")
|
| 25 |
+
else:
|
| 26 |
+
print(f"Erreur : {response.status_code} - {response.text}")
|
doc.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HG URL
|
| 2 |
+
https://huggingface.co/spaces/MiCkSoftware/laria-startup
|
| 3 |
+
|
| 4 |
+
# Sample call
|
| 5 |
+
curl -X 'POST' \
|
| 6 |
+
'https://micksoftware-laria-startup.hf.space/predict' \
|
| 7 |
+
-H 'accept: application/json' \
|
| 8 |
+
-H 'Content-Type: application/json' \
|
| 9 |
+
-d '{
|
| 10 |
+
"message": "salut",
|
| 11 |
+
"history": [],
|
| 12 |
+
"system_message": "You are a friendly Chatbot.",
|
| 13 |
+
"max_tokens": 512,
|
| 14 |
+
"temperature": 0.7,
|
| 15 |
+
"top_p": 0.95
|
| 16 |
+
}'
|
| 17 |
+
|