MiCkSoftware commited on
Commit
41f2a92
·
1 Parent(s): e0bc5c6

fix + client

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +3 -0
  2. app.py +9 -7
  3. client.py +26 -0
  4. doc.md +17 -0
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python.pythonPath": "/Users/michael/.pyenv/versions/3.12.3/bin/python3.12"
3
+ }
app.py CHANGED
@@ -3,7 +3,6 @@ from fastapi.responses import StreamingResponse
3
  from huggingface_hub import InferenceClient
4
  from pydantic import BaseModel
5
  from typing import List, Tuple
6
- import asyncio
7
 
8
  # Initialisation du client Hugging Face
9
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -12,6 +11,8 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
12
  app = FastAPI()
13
 
14
  # Modèle pour les données d'entrée
 
 
15
  class PredictionRequest(BaseModel):
16
  message: str
17
  history: List[Tuple[str, str]] = []
@@ -21,20 +22,21 @@ class PredictionRequest(BaseModel):
21
  top_p: float = 0.95
22
 
23
 
24
- async def generate_stream(request: PredictionRequest):
25
  """
26
- Générateur asynchrone pour le streaming de réponse.
27
  """
28
  messages = [{"role": "system", "content": request.system_message}]
29
  for user_input, assistant_response in request.history:
30
  if user_input:
31
  messages.append({"role": "user", "content": user_input})
32
  if assistant_response:
33
- messages.append({"role": "assistant", "content": assistant_response})
 
34
  messages.append({"role": "user", "content": request.message})
35
-
36
  try:
37
- async for message in client.chat_completion(
38
  messages,
39
  max_tokens=request.max_tokens,
40
  stream=True,
@@ -44,7 +46,7 @@ async def generate_stream(request: PredictionRequest):
44
  token = message.choices[0].delta.content
45
  yield token
46
  except Exception as e:
47
- raise HTTPException(status_code=500, detail=str(e))
48
 
49
 
50
  @app.post("/predict")
 
3
  from huggingface_hub import InferenceClient
4
  from pydantic import BaseModel
5
  from typing import List, Tuple
 
6
 
7
  # Initialisation du client Hugging Face
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
11
  app = FastAPI()
12
 
13
  # Modèle pour les données d'entrée
14
+
15
+
16
  class PredictionRequest(BaseModel):
17
  message: str
18
  history: List[Tuple[str, str]] = []
 
22
  top_p: float = 0.95
23
 
24
 
25
+ def generate_stream(request: PredictionRequest):
26
  """
27
+ Générateur synchrone pour le streaming de réponse.
28
  """
29
  messages = [{"role": "system", "content": request.system_message}]
30
  for user_input, assistant_response in request.history:
31
  if user_input:
32
  messages.append({"role": "user", "content": user_input})
33
  if assistant_response:
34
+ messages.append(
35
+ {"role": "assistant", "content": assistant_response})
36
  messages.append({"role": "user", "content": request.message})
37
+
38
  try:
39
+ for message in client.chat_completion(
40
  messages,
41
  max_tokens=request.max_tokens,
42
  stream=True,
 
46
  token = message.choices[0].delta.content
47
  yield token
48
  except Exception as e:
49
+ yield f"Error: {str(e)}"
50
 
51
 
52
  @app.post("/predict")
client.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ # URL de ton endpoint
4
+ url = "https://micksoftware-laria-startup.hf.space/predict"
5
+
6
+ # Données pour la requête
7
+ payload = {
8
+ "message": "salut",
9
+ "history": [],
10
+ "system_message": "You are a friendly Chatbot.",
11
+ "max_tokens": 512,
12
+ "temperature": 0.7,
13
+ "top_p": 0.95,
14
+ }
15
+
16
+ # Faire la requête avec streaming activé
17
+ response = requests.post(url, json=payload, stream=True)
18
+
19
+ # Lire la réponse en streaming
20
+ if response.status_code == 200:
21
+ print("Streaming response:")
22
+ for chunk in response.iter_lines(decode_unicode=True):
23
+ if chunk:
24
+ print(chunk, end="")
25
+ else:
26
+ print(f"Erreur : {response.status_code} - {response.text}")
doc.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HG URL
2
+ https://huggingface.co/spaces/MiCkSoftware/laria-startup
3
+
4
+ # Sample call
5
+ curl -X 'POST' \
6
+ 'https://micksoftware-laria-startup.hf.space/predict' \
7
+ -H 'accept: application/json' \
8
+ -H 'Content-Type: application/json' \
9
+ -d '{
10
+ "message": "salut",
11
+ "history": [],
12
+ "system_message": "You are a friendly Chatbot.",
13
+ "max_tokens": 512,
14
+ "temperature": 0.7,
15
+ "top_p": 0.95
16
+ }'
17
+