Spaces:

Hivra
/

LLama

Paused

App Files Files Community

Hivra commited on Apr 25, 2025

Commit

0d2cdcb

verified ·

1 Parent(s): a2e7e63

Update main.py

Browse files

Files changed (1) hide show

main.py +20 -32

main.py CHANGED Viewed

@@ -1,13 +1,8 @@
-import os
-import time
-import uuid
-import json
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from typing import List
 from meta_ai_api import MetaAI
 class Message(BaseModel):
@@ -22,51 +17,44 @@ app = FastAPI()
 DEFAULT_MODEL = os.getenv("MODEL_NAME", "llama-3-70b")
 meta = MetaAI()
-@app.get("/")
-def read_root():
-    return {"msg": "Try POST /v1/chat/completions with stream=True"}
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
-    # pick model from body or env
     model_name = req.model or DEFAULT_MODEL
-    # build one big prompt
     prompt = "\n".join(f"{m.role}: {m.content}" for m in req.messages)
-    # generator for SSE
     def event_stream():
-        # start streaming from MetaAI
         for chunk in meta.prompt(message=prompt, stream=True):
-            # build a “chunk” JSON like OpenAI does
             data = {
                 "id": f"meta-{uuid.uuid4()}",
                 "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": model_name,
-                "choices": [
-                    {
-                        "delta": {"content": chunk["message"]},
-                        "index": 0,
-                        "finish_reason": None
-                    }
-                ]
             }
             yield f"data: {json.dumps(data)}\n\n"
-        # once done, send the final [DONE]
         done = {
             "id": f"meta-{uuid.uuid4()}",
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": model_name,
-            "choices": [
-                {
-                    "delta": {},
-                    "index": 0,
-                    "finish_reason": "stop"
-                }
-            ]
         }
         yield f"data: {json.dumps(done)}\n\n"

+import os, time, uuid, json
+from fastapi import FastAPI
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from typing import List
 from meta_ai_api import MetaAI
 class Message(BaseModel):
 DEFAULT_MODEL = os.getenv("MODEL_NAME", "llama-3-70b")
 meta = MetaAI()
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
     model_name = req.model or DEFAULT_MODEL
     prompt = "\n".join(f"{m.role}: {m.content}" for m in req.messages)
     def event_stream():
+        last_text = ""
         for chunk in meta.prompt(message=prompt, stream=True):
+            full = chunk["message"]          # the entire text so far
+            new_piece = full[len(last_text):]  # what’s just arrived
+            last_text = full
+            if not new_piece:
+                continue
             data = {
                 "id": f"meta-{uuid.uuid4()}",
                 "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": model_name,
+                "choices": [{
+                    "delta": {"content": new_piece},
+                    "index": 0,
+                    "finish_reason": None
+                }]
             }
             yield f"data: {json.dumps(data)}\n\n"
+        # final stop signal
         done = {
             "id": f"meta-{uuid.uuid4()}",
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": model_name,
+            "choices": [{
+                "delta": {},
+                "index": 0,
+                "finish_reason": "stop"
+            }]
         }
         yield f"data: {json.dumps(done)}\n\n"