ag3 / app.py
nulltron's picture
Update app.py
ef9242f verified
Raw
History Blame Contribute Delete
3.3 kB
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
import requests
import os
app = FastAPI(title="Hyper-Fast Serverless Engine v3.5")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Serverless Inference API Endpoint
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
# Fetching token from Space Secrets
HF_TOKEN = os.getenv("HF_TOKEN", "")
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
class ChatPayload(BaseModel):
user_id: str
user_message: str
current_chat_history: List[Dict[str, Any]] = []
user_files: Dict[str, Any] = {}
@app.get("/")
def status():
return {"status": "online", "engine": "Serverless GPU Bridge Active"}
@app.post("/chat")
async def chat_endpoint(payload: ChatPayload):
try:
user_query = payload.user_message
# 1. Base System Prompt Setup
prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a fast, responsive AI assistant.<|eot_id|>"
# 2. Append history with correct formatting (Fixed {} here)
for turn in payload.current_chat_history[-4:]:
role = "user" if turn.get("role") == "user" else "assistant"
content = turn.get("content", "")
prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\mrow{content}<|eot_id|>"
# 3. Append current user query
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
api_payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 256,
"temperature": 0.7,
"return_full_text": False
}
}
# 4. Request to HuggingFace API Cluster
response = requests.post(API_URL, headers=headers, json=api_payload)
if response.status_code != 200:
raise HTTPException(status_code=response.status_code, detail=f"HF API Error: {response.text}")
result = response.json()
# 5. Extract generated text safely
if isinstance(result, list) and len(result) > 0:
generated_text = result[0].get("generated_text", "").strip()
elif isinstance(result, dict) and "generated_text" in result:
generated_text = result.get("generated_text", "").strip()
else:
generated_text = str(result)
# 6. Build response state
updated_history = payload.current_chat_history + [
{"role": "user", "content": user_query},
{"role": "assistant", "content": generated_text}
]
return {
"updated_chat_history": updated_history,
"updated_files": payload.user_files
}
except Exception as exec_err:
# Pushes logs inside HuggingFace console for exact traceback if anything else breaks
print(f"Exception Triggered: {str(exec_err)}")
raise HTTPException(status_code=500, detail=f"Cloud Matrix Failure: {str(exec_err)}")