Spaces:

Jadyro
/

Legal_test

Sleeping

Jadyro commited on Nov 13, 2025

Commit

ced62fa

verified ·

1 Parent(s): 0b12103

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List
+from transformers import AutoTokenizer, pipeline
+MODEL_ID = "Equall/Saul-7B-Instruct-v1"
+# Load tokenizer + model pipeline
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+pipe = pipeline(
+    "text-generation",
+    model=MODEL_ID,
+    tokenizer=tokenizer,
+    device_map="auto",        # will use GPU if available, CPU otherwise
+    max_new_tokens=512,
+)
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    messages: List[Message]
+class ChatResponse(BaseModel):
+    reply: str
+app = FastAPI()
+@app.get("/")
+def root():
+    return {"status": "ok", "model": MODEL_ID}
+@app.post("/chat", response_model=ChatResponse)
+def chat(req: ChatRequest):
+    # Convert Pydantic objects into plain dicts for the tokenizer
+    messages = [m.dict() for m in req.messages]
+    # Use the model's chat template as recommended on the model card
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    outputs = pipe(
+        prompt,
+        max_new_tokens=512,
+        do_sample=False,
+        temperature=0.0,
+        top_p=1.0,
+    )
+    full = outputs[0]["generated_text"]
+    # Strip the prompt from the beginning
+    reply = full[len(prompt):].strip()
+    return ChatResponse(reply=reply)