thangquang09
update Dockerfile
de800a4
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from llama_index.llms.ollama import Ollama
from llama_index.core.llms import ChatMessage
import time
app = FastAPI()
class Generate(BaseModel):
response: str
duration: float
chat_history = []
model = Ollama(model="phi2", base_url="http://localhost:11434")
def generate_text(model: Ollama, prompt: str) -> {}:
if prompt == "":
return {
"response": "Please provide a prompt.",
"duration": str(0)
}
chat_history.append({
"role": "user",
"content": prompt
})
messages = [ChatMessage(role=msg["role"], content=msg["content"]) for msg in chat_history]
start_time = time.time()
response_gen = model.stream_chat(messages)
full_response = ""
try:
for response_chunk in response_gen:
full_response += response_chunk.delta
except Exception as e:
return {
"response": f"Error: {str(e)}",
"duration": 0
}
duration = time.time() - start_time
chat_history.append({
"role": "assistant",
"content": full_response
})
return {
"response": full_response,
"duration": f"{duration:.2f}"
}
@app.get("/")
async def root():
return {"message": "Hello World"}
@app.get("/health")
async def health_check():
try:
import requests
response = requests.get("http://localhost:11434/api/version")
ollama_status = "OK" if response.status_code == 200 else "Not available"
except:
ollama_status = "Error"
return {
"status": "healthy",
"ollama_status": ollama_status,
"models_loaded": model is not None
}
@app.post("/api/generate", summary="Generate text from prompt", tags=["Generate"], response_model=Generate)
def inference(input_prompt: str):
return generate_text(model, input_prompt)