Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| from pydantic import BaseModel, Field | |
| import gradio as gr | |
| # ---------------- CONFIG ---------------- | |
| MODEL_ID = os.getenv("MODEL_ID", "HuggingFaceH4/zephyr-7b-beta") | |
| HF_TOKEN = os.getenv("HF_TOKEN") # must be set in Space secrets | |
| API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}" | |
| HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} | |
| app = FastAPI(title="Edu Assistant API π") | |
| # ---------------- CORS ---------------- | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=False, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ---------------- MODELS ---------------- | |
| class ChatRequest(BaseModel): | |
| user: str | |
| temperature: float = Field(default=0.7, ge=0.0, le=2.0) | |
| max_tokens: int = Field(default=512, gt=1, le=1024) | |
| style: str = Field(default="detailed", pattern="^(short|detailed|no_explanation)$") | |
| # ---------------- HELPERS ---------------- | |
| def build_prompt(user: str, style: str) -> str: | |
| style_note = { | |
| "short": "Answer briefly.", | |
| "detailed": "Explain step by step, with details.", | |
| "no_explanation": "Just give the answer, no explanation." | |
| }[style] | |
| return ( | |
| "You are a helpful multilingual tutor. Use emojis to make responses engaging.\n\n" | |
| f"User: {user}\n" | |
| f"Instruction: {style_note}\n\n" | |
| "Assistant:" | |
| ) | |
| def query_hf(prompt: str, max_tokens: int, temperature: float) -> str: | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": 0.9, | |
| "do_sample": True, | |
| "return_full_text": False | |
| } | |
| } | |
| response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60) | |
| if response.status_code != 200: | |
| raise RuntimeError(f"HF API Error {response.status_code}: {response.text}") | |
| data = response.json() | |
| if isinstance(data, list) and "generated_text" in data[0]: | |
| return data[0]["generated_text"].strip() | |
| else: | |
| return str(data) | |
| # ---------------- FASTAPI ENDPOINT ---------------- | |
| def chat(req: ChatRequest): | |
| try: | |
| prompt = build_prompt(req.user, req.style) | |
| text = query_hf(prompt, req.max_tokens, req.temperature) | |
| return {"answer": "π‘ " + text} | |
| except RuntimeError as e: | |
| return JSONResponse({"error": str(e)}, status_code=502) | |
| # ---------------- GRADIO WRAPPER ---------------- | |
| def gradio_chat(user, temperature=0.7, max_tokens=512, style="detailed"): | |
| prompt = build_prompt(user, style) | |
| try: | |
| text = query_hf(prompt, max_tokens, temperature) | |
| return text | |
| except Exception as e: | |
| return f"[error] {str(e)}" | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=gradio_chat, | |
| inputs=[ | |
| gr.Textbox(label="User Message"), | |
| gr.Slider(0, 2, value=0.7, label="Temperature"), | |
| gr.Slider(1, 1024, value=512, label="Max Tokens"), | |
| gr.Dropdown(["short", "detailed", "no_explanation"], value="detailed", label="Style") | |
| ], | |
| outputs=gr.Textbox(label="Assistant Response"), | |
| title="Edu Assistant API π" | |
| ) | |
| # Launch Gradio on HF Spaces | |
| iface.launch(server_name="0.0.0.0", server_port=7860, share=True) |