Spaces:

nulltron
/

ag3

Paused

App Files Files Community

ag3 / app.py

nulltron

Update app.py

ef9242f verified 5 days ago

Raw

History Blame Contribute Delete

3.3 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import List, Dict, Any
	import requests
	import os

	app = FastAPI(title="Hyper-Fast Serverless Engine v3.5")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Serverless Inference API Endpoint
	API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"

	# Fetching token from Space Secrets
	HF_TOKEN = os.getenv("HF_TOKEN", "")
	headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}

	class ChatPayload(BaseModel):
	user_id: str
	user_message: str
	current_chat_history: List[Dict[str, Any]] = []
	user_files: Dict[str, Any] = {}

	@app.get("/")
	def status():
	return {"status": "online", "engine": "Serverless GPU Bridge Active"}

	@app.post("/chat")
	async def chat_endpoint(payload: ChatPayload):
	try:
	user_query = payload.user_message

	# 1. Base System Prompt Setup
	prompt = "<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n\nYou are a fast, responsive AI assistant.<\|eot_id\|>"

	# 2. Append history with correct formatting (Fixed {} here)
	for turn in payload.current_chat_history[-4:]:
	role = "user" if turn.get("role") == "user" else "assistant"
	content = turn.get("content", "")
	prompt += f"<\|start_header_id\|>{role}<\|end_header_id\|>\n\mrow{content}<\|eot_id\|>"

	# 3. Append current user query
	prompt += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{user_query}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n"

	api_payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 256,
	"temperature": 0.7,
	"return_full_text": False
	}
	}

	# 4. Request to HuggingFace API Cluster
	response = requests.post(API_URL, headers=headers, json=api_payload)

	if response.status_code != 200:
	raise HTTPException(status_code=response.status_code, detail=f"HF API Error: {response.text}")

	result = response.json()

	# 5. Extract generated text safely
	if isinstance(result, list) and len(result) > 0:
	generated_text = result[0].get("generated_text", "").strip()
	elif isinstance(result, dict) and "generated_text" in result:
	generated_text = result.get("generated_text", "").strip()
	else:
	generated_text = str(result)

	# 6. Build response state
	updated_history = payload.current_chat_history + [
	{"role": "user", "content": user_query},
	{"role": "assistant", "content": generated_text}
	]

	return {
	"updated_chat_history": updated_history,
	"updated_files": payload.user_files
	}

	except Exception as exec_err:
	# Pushes logs inside HuggingFace console for exact traceback if anything else breaks
	print(f"Exception Triggered: {str(exec_err)}")
	raise HTTPException(status_code=500, detail=f"Cloud Matrix Failure: {str(exec_err)}")