File size: 1,305 Bytes
3bb8132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from huggingface_hub import InferenceClient

app = FastAPI()

client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")

class PromptRequest(BaseModel):
    prompt: str
    history: list

def format_prompt(message, history):
    system_prompt = "You are Mistral, a gentle and a useful AI assistant. My input is "
    prompt = "<s>"
    prompt += f"[INST] {system_prompt} [/INST]"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt, history):
    formatted_prompt = format_prompt(prompt, history)
    
    stream = client.text_generation(formatted_prompt, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output

@app.post("/generate/")
async def generate_response(request: PromptRequest):
    try:
        responses = []
        async for response in generate(request.prompt, request.history):
            responses.append(response)
        return {"responses": responses}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))