Hivra commited on
Commit
0d2cdcb
·
verified ·
1 Parent(s): a2e7e63

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +20 -32
main.py CHANGED
@@ -1,13 +1,8 @@
1
- import os
2
- import time
3
- import uuid
4
- import json
5
-
6
- from fastapi import FastAPI, HTTPException
7
  from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
  from typing import List
10
-
11
  from meta_ai_api import MetaAI
12
 
13
  class Message(BaseModel):
@@ -22,51 +17,44 @@ app = FastAPI()
22
  DEFAULT_MODEL = os.getenv("MODEL_NAME", "llama-3-70b")
23
  meta = MetaAI()
24
 
25
- @app.get("/")
26
- def read_root():
27
- return {"msg": "Try POST /v1/chat/completions with stream=True"}
28
-
29
  @app.post("/v1/chat/completions")
30
  async def chat_completions(req: ChatRequest):
31
- # pick model from body or env
32
  model_name = req.model or DEFAULT_MODEL
33
-
34
- # build one big prompt
35
  prompt = "\n".join(f"{m.role}: {m.content}" for m in req.messages)
36
 
37
- # generator for SSE
38
  def event_stream():
39
- # start streaming from MetaAI
40
  for chunk in meta.prompt(message=prompt, stream=True):
41
- # build a “chunk JSON like OpenAI does
 
 
 
 
 
42
  data = {
43
  "id": f"meta-{uuid.uuid4()}",
44
  "object": "chat.completion.chunk",
45
  "created": int(time.time()),
46
  "model": model_name,
47
- "choices": [
48
- {
49
- "delta": {"content": chunk["message"]},
50
- "index": 0,
51
- "finish_reason": None
52
- }
53
- ]
54
  }
55
  yield f"data: {json.dumps(data)}\n\n"
56
 
57
- # once done, send the final [DONE]
58
  done = {
59
  "id": f"meta-{uuid.uuid4()}",
60
  "object": "chat.completion.chunk",
61
  "created": int(time.time()),
62
  "model": model_name,
63
- "choices": [
64
- {
65
- "delta": {},
66
- "index": 0,
67
- "finish_reason": "stop"
68
- }
69
- ]
70
  }
71
  yield f"data: {json.dumps(done)}\n\n"
72
 
 
1
+ import os, time, uuid, json
2
+ from fastapi import FastAPI
 
 
 
 
3
  from fastapi.responses import StreamingResponse
4
  from pydantic import BaseModel
5
  from typing import List
 
6
  from meta_ai_api import MetaAI
7
 
8
  class Message(BaseModel):
 
17
  DEFAULT_MODEL = os.getenv("MODEL_NAME", "llama-3-70b")
18
  meta = MetaAI()
19
 
 
 
 
 
20
  @app.post("/v1/chat/completions")
21
  async def chat_completions(req: ChatRequest):
 
22
  model_name = req.model or DEFAULT_MODEL
 
 
23
  prompt = "\n".join(f"{m.role}: {m.content}" for m in req.messages)
24
 
 
25
  def event_stream():
26
+ last_text = ""
27
  for chunk in meta.prompt(message=prompt, stream=True):
28
+ full = chunk["message"] # the entire text so far
29
+ new_piece = full[len(last_text):] # what’s just arrived
30
+ last_text = full
31
+ if not new_piece:
32
+ continue
33
+
34
  data = {
35
  "id": f"meta-{uuid.uuid4()}",
36
  "object": "chat.completion.chunk",
37
  "created": int(time.time()),
38
  "model": model_name,
39
+ "choices": [{
40
+ "delta": {"content": new_piece},
41
+ "index": 0,
42
+ "finish_reason": None
43
+ }]
 
 
44
  }
45
  yield f"data: {json.dumps(data)}\n\n"
46
 
47
+ # final stop signal
48
  done = {
49
  "id": f"meta-{uuid.uuid4()}",
50
  "object": "chat.completion.chunk",
51
  "created": int(time.time()),
52
  "model": model_name,
53
+ "choices": [{
54
+ "delta": {},
55
+ "index": 0,
56
+ "finish_reason": "stop"
57
+ }]
 
 
58
  }
59
  yield f"data: {json.dumps(done)}\n\n"
60