| | from fastapi import FastAPI |
| | from pydantic import BaseModel |
| | from fastapi.middleware.cors import CORSMiddleware |
| | from llama_index.llms.ollama import Ollama |
| | from llama_index.core.llms import ChatMessage |
| | import time |
| |
|
| | app = FastAPI() |
| |
|
| | class Generate(BaseModel): |
| | response: str |
| | duration: float |
| | |
| | chat_history = [] |
| | model = Ollama(model="phi2", base_url="http://localhost:11434") |
| |
|
| | def generate_text(model: Ollama, prompt: str) -> {}: |
| | if prompt == "": |
| | return { |
| | "response": "Please provide a prompt.", |
| | "duration": str(0) |
| | } |
| |
|
| | chat_history.append({ |
| | "role": "user", |
| | "content": prompt |
| | }) |
| | |
| | messages = [ChatMessage(role=msg["role"], content=msg["content"]) for msg in chat_history] |
| | |
| | start_time = time.time() |
| | response_gen = model.stream_chat(messages) |
| | full_response = "" |
| | |
| | try: |
| | for response_chunk in response_gen: |
| | full_response += response_chunk.delta |
| | except Exception as e: |
| | return { |
| | "response": f"Error: {str(e)}", |
| | "duration": 0 |
| | } |
| | |
| | |
| | duration = time.time() - start_time |
| | |
| | chat_history.append({ |
| | "role": "assistant", |
| | "content": full_response |
| | }) |
| | |
| | return { |
| | "response": full_response, |
| | "duration": f"{duration:.2f}" |
| | } |
| | |
| | @app.get("/") |
| | async def root(): |
| | return {"message": "Hello World"} |
| |
|
| | @app.get("/health") |
| | async def health_check(): |
| | try: |
| | import requests |
| | response = requests.get("http://localhost:11434/api/version") |
| | ollama_status = "OK" if response.status_code == 200 else "Not available" |
| | except: |
| | ollama_status = "Error" |
| | |
| | return { |
| | "status": "healthy", |
| | "ollama_status": ollama_status, |
| | "models_loaded": model is not None |
| | } |
| |
|
| | @app.post("/api/generate", summary="Generate text from prompt", tags=["Generate"], response_model=Generate) |
| | def inference(input_prompt: str): |
| | return generate_text(model, input_prompt) |
| |
|
| |
|
| |
|