Spaces:

Jadyro
/

Legal_test

Sleeping

App Files Files Community

Jadyro commited on Nov 13, 2025

Commit

f1dbca3

verified ·

1 Parent(s): 4e9db80

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -41

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List, Optional
 from transformers import AutoTokenizer, pipeline
@@ -11,7 +11,7 @@ pipe = pipeline(
     "text-generation",
     model=MODEL_ID,
     tokenizer=tokenizer,
-    device_map="auto",         # "cpu" if you want to force CPU
     max_new_tokens=512,
 )
@@ -24,45 +24,85 @@ class ChatMessage(BaseModel):
 class ChatRequest(BaseModel):
-    model: Optional[str] = None  # ignored, for OpenAI-compat
     messages: List[ChatMessage]
     temperature: Optional[float] = 0.0
     max_tokens: Optional[int] = 512
-class ChatChoiceMessage(BaseModel):
-    role: str
-    content: str
-class ChatChoice(BaseModel):
-    index: int
-    message: ChatChoiceMessage
-    finish_reason: str
-class ChatResponse(BaseModel):
-    id: str
-    object: str
-    choices: List[ChatChoice]
 @app.get("/")
 def root():
     return {"status": "ok", "model": MODEL_ID}
-@app.post("/v1/chat/completions", response_model=ChatResponse)
-def chat(request: ChatRequest):
-    # Convert Pydantic objects to plain dicts
-    messages = [m.dict() for m in request.messages]
-    # Use the model's chat template
     prompt = tokenizer.apply_chat_template(
-        messages,
         tokenize=False,
         add_generation_prompt=True,
     )
     outputs = pipe(
         prompt,
@@ -75,18 +115,17 @@ def chat(request: ChatRequest):
     full = outputs[0]["generated_text"]
     reply = full[len(prompt):].strip()
-    response = ChatResponse(
-        id="chatcmpl-1",
-        object="chat.completion",
-        choices=[
-            ChatChoice(
-                index=0,
-                message=ChatChoiceMessage(
-                    role="assistant",
-                    content=reply,
-                ),
-                finish_reason="stop",
-            )
         ],
-    )
-    return response

+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List, Optional
 from transformers import AutoTokenizer, pipeline
     "text-generation",
     model=MODEL_ID,
     tokenizer=tokenizer,
+    device_map="auto",         # "cpu" on HF’s free tier
     max_new_tokens=512,
 )
 class ChatRequest(BaseModel):
+    model: Optional[str] = None  # ignored, OpenAI-style compat
     messages: List[ChatMessage]
     temperature: Optional[float] = 0.0
     max_tokens: Optional[int] = 512
 @app.get("/")
 def root():
     return {"status": "ok", "model": MODEL_ID}
+def build_prompt(raw_messages: List[dict]) -> str:
+    """
+    Normalize messages so they fit the template:
+    - Collect system messages and prepend their text to the first user message.
+    - Drop leading assistant messages.
+    - Merge consecutive messages with the same role.
+    - Ensure we end up with user/assistant/user/assistant/... only.
+    """
+    system_parts = []
+    ua_messages = []
+    # Separate system vs user/assistant
+    for m in raw_messages:
+        role = m.get("role")
+        content = m.get("content", "")
+        if role == "system":
+            if content:
+                system_parts.append(content)
+        elif role in ("user", "assistant"):
+            ua_messages.append({"role": role, "content": content})
+        # ignore anything else
+    # Drop leading assistants (template wants to start with user)
+    while ua_messages and ua_messages[0]["role"] != "user":
+        ua_messages.pop(0)
+    # Merge consecutive messages with same role
+    normalized: List[dict] = []
+    for m in ua_messages:
+        if not normalized:
+            normalized.append(m)
+        else:
+            if normalized[-1]["role"] == m["role"]:
+                normalized[-1]["content"] += "\n\n" + m["content"]
+            else:
+                normalized.append(m)
+    if not normalized:
+        raise ValueError("No user messages found after normalization.")
+    # Prepend system text into the first user message, if any
+    if system_parts:
+        system_text = "\n\n".join(system_parts)
+        if normalized[0]["role"] == "user":
+            normalized[0]["content"] = system_text + "\n\n" + normalized[0]["content"]
+        else:
+            # If for some reason first is assistant, prepend a synthetic user
+            normalized.insert(0, {"role": "user", "content": system_text})
+    # At this point we should only have user/assistant alternating.
+    # Let tokenizer.apply_chat_template enforce the exact format.
     prompt = tokenizer.apply_chat_template(
+        normalized,
         tokenize=False,
         add_generation_prompt=True,
     )
+    return prompt
+@app.post("/v1/chat/completions")
+def chat(request: ChatRequest):
+    try:
+        messages = [m.dict() for m in request.messages]
+        prompt = build_prompt(messages)
+    except Exception as e:
+        # Don't crash the app – return a 400 with explanation
+        raise HTTPException(status_code=400, detail=f"Invalid message history: {e}")
     outputs = pipe(
         prompt,
     full = outputs[0]["generated_text"]
     reply = full[len(prompt):].strip()
+    return {
+        "id": "chatcmpl-1",
+        "object": "chat.completion",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": reply,
+                },
+                "finish_reason": "stop",
+            }
         ],
+    }