Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """OpenAI-compatible Chat Completions API server for the Fourth GPT model.""" | |
| import time | |
| import uuid | |
| import json | |
| import os | |
| from fastapi import FastAPI, HTTPException, Header | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| from model import FourthModel | |
| app = FastAPI(title="Fourth GPT API", version="1.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| fourth = FourthModel() | |
| class Message(BaseModel): | |
| role: str | |
| content: str | |
| class ChatCompletionRequest(BaseModel): | |
| model: str = "fourth-gpt" | |
| messages: list[Message] | |
| max_tokens: int = 128 | |
| temperature: float = 0.5 | |
| stream: bool = False | |
| class Choice(BaseModel): | |
| index: int = 0 | |
| message: Message | |
| finish_reason: str = "stop" | |
| class Usage(BaseModel): | |
| prompt_tokens: int | |
| completion_tokens: int | |
| total_tokens: int | |
| class ChatCompletionResponse(BaseModel): | |
| id: str | |
| object: str = "chat.completion" | |
| created: int | |
| model: str | |
| choices: list[Choice] | |
| usage: Usage | |
| class ModelInfo(BaseModel): | |
| id: str | |
| object: str = "model" | |
| created: int = 0 | |
| owned_by: str = "fourth" | |
| class ModelList(BaseModel): | |
| object: str = "list" | |
| data: list[ModelInfo] | |
| def startup(): | |
| fourth.load() | |
| def root(): | |
| return {"message": "Fourth GPT API is running", "model": "fourth-gpt", "params": 344256} | |
| def list_models(): | |
| return ModelList(data=[ | |
| ModelInfo(id="fourth-gpt", created=int(time.time())), | |
| ]) | |
| def get_model(model_id: str): | |
| if model_id != "fourth-gpt": | |
| raise HTTPException(status_code=404, detail="Model not found") | |
| return ModelInfo(id="fourth-gpt", created=int(time.time())) | |
| def chat_completions( | |
| req: ChatCompletionRequest, | |
| authorization: Optional[str] = Header(None), | |
| ): | |
| if not req.messages: | |
| raise HTTPException(status_code=400, detail="messages must not be empty") | |
| user_msg = None | |
| for msg in reversed(req.messages): | |
| if msg.role == "user": | |
| user_msg = msg.content | |
| break | |
| if user_msg is None: | |
| raise HTTPException(status_code=400, detail="No user message found") | |
| response_text = fourth.generate( | |
| prompt=user_msg, | |
| max_tokens=req.max_tokens, | |
| temperature=req.temperature, | |
| ) | |
| prompt_tokens = len(user_msg) | |
| completion_tokens = len(response_text) | |
| return ChatCompletionResponse( | |
| id=f"chatcmpl-{uuid.uuid4().hex[:12]}", | |
| created=int(time.time()), | |
| model=req.model, | |
| choices=[ | |
| Choice( | |
| message=Message(role="assistant", content=response_text), | |
| ) | |
| ], | |
| usage=Usage( | |
| prompt_tokens=prompt_tokens, | |
| completion_tokens=completion_tokens, | |
| total_tokens=prompt_tokens + completion_tokens, | |
| ), | |
| ) | |
| def health(): | |
| return {"status": "ok", "model_loaded": fourth.model is not None} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) | |