| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from typing import List, Dict, Any |
| import requests |
| import os |
|
|
| app = FastAPI(title="Hyper-Fast Serverless Engine v3.5") |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" |
|
|
| |
| HF_TOKEN = os.getenv("HF_TOKEN", "") |
| headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} |
|
|
| class ChatPayload(BaseModel): |
| user_id: str |
| user_message: str |
| current_chat_history: List[Dict[str, Any]] = [] |
| user_files: Dict[str, Any] = {} |
|
|
| @app.get("/") |
| def status(): |
| return {"status": "online", "engine": "Serverless GPU Bridge Active"} |
|
|
| @app.post("/chat") |
| async def chat_endpoint(payload: ChatPayload): |
| try: |
| user_query = payload.user_message |
| |
| |
| prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a fast, responsive AI assistant.<|eot_id|>" |
| |
| |
| for turn in payload.current_chat_history[-4:]: |
| role = "user" if turn.get("role") == "user" else "assistant" |
| content = turn.get("content", "") |
| prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\mrow{content}<|eot_id|>" |
| |
| |
| prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" |
|
|
| api_payload = { |
| "inputs": prompt, |
| "parameters": { |
| "max_new_tokens": 256, |
| "temperature": 0.7, |
| "return_full_text": False |
| } |
| } |
|
|
| |
| response = requests.post(API_URL, headers=headers, json=api_payload) |
| |
| if response.status_code != 200: |
| raise HTTPException(status_code=response.status_code, detail=f"HF API Error: {response.text}") |
|
|
| result = response.json() |
| |
| |
| if isinstance(result, list) and len(result) > 0: |
| generated_text = result[0].get("generated_text", "").strip() |
| elif isinstance(result, dict) and "generated_text" in result: |
| generated_text = result.get("generated_text", "").strip() |
| else: |
| generated_text = str(result) |
|
|
| |
| updated_history = payload.current_chat_history + [ |
| {"role": "user", "content": user_query}, |
| {"role": "assistant", "content": generated_text} |
| ] |
|
|
| return { |
| "updated_chat_history": updated_history, |
| "updated_files": payload.user_files |
| } |
|
|
| except Exception as exec_err: |
| |
| print(f"Exception Triggered: {str(exec_err)}") |
| raise HTTPException(status_code=500, detail=f"Cloud Matrix Failure: {str(exec_err)}") |