| from llama_cpp.server.app import create_app, Settings | |
| from fastapi.responses import HTMLResponse | |
| import os | |
| app = create_app( | |
| Settings( | |
| n_threads=4, | |
| model="model/gguf-model.gguf", | |
| embedding=True, | |
| n_gpu_layers=33 | |
| ) | |
| ) | |
| def custom_index_route(): | |
| html_content = """ | |
| <html> | |
| <body> | |
| <h1>Test</h1> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(content=html_content) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |