Spaces:
Sleeping
Sleeping
File size: 8,774 Bytes
8e2d3ce 5dab16f 8062422 e2b3072 b8f0d37 97ad689 5dab16f 0047e36 6c2e700 8e2d3ce 97ad689 8e2d3ce b8f0d37 8062422 b8f0d37 8062422 b8f0d37 0047e36 8dabf79 12422af 8dabf79 8062422 12422af 8dabf79 12422af 0047e36 12422af 97ad689 12422af 07bdd5c 12422af 97ad689 e9f41d5 8062422 e9f41d5 8062422 97ad689 8062422 12422af 0047e36 8062422 12422af e9f41d5 12422af e9f41d5 12422af 97ad689 12422af e9f41d5 12422af 97ad689 12422af 97ad689 12422af 97ad689 12422af 0047e36 b8f0d37 97ad689 b8f0d37 8062422 b8f0d37 8062422 12422af 5e7aadc 12422af b8f0d37 97ad689 b8f0d37 12422af 97ad689 b8f0d37 12422af b8f0d37 12422af 97ad689 12422af 6c2e700 97ad689 8e2d3ce 97ad689 12422af 97ad689 8e2d3ce 97ad689 12422af 8062422 8e2d3ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Any
import os
import uuid
import time
import re
import httpx
# ============== Pydantic Models ==============
class Validation(BaseModel):
prompt: str
class EthConversionRequest(BaseModel):
value: float
from_unit: str = "eth"
class Message(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[Message]
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 8192
stream: Optional[bool] = False
tools: Optional[List[Any]] = None # accept but ignore — prevents OpenCode hang
tool_choice: Optional[Any] = None # accept but ignore
stop: Optional[Any] = None # accept but ignore
frequency_penalty: Optional[float] = None # accept but ignore
presence_penalty: Optional[float] = None # accept but ignore
top_p: Optional[float] = None # accept but ignore
class Choice(BaseModel):
index: int
message: Message
finish_reason: str
class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[Choice]
usage: Usage
# ============== FastAPI App ==============
app = FastAPI(
title="Luminous API",
description="""
## Luminous Coding Assistant API
OpenAI-compatible API powered by Qwen via HuggingFace Router Inference API.
Set Base URL: `https://jeeltcraft-luminous.hf.space/v1`
""",
version="1.2.0",
contact={"name": "Jeeltcraft", "url": "https://huggingface.co/jeeltcraft"},
license_info={"name": "MIT"},
)
# ============== LLM Core ==============
HF_MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"
PRIMARY_URL = "https://router.huggingface.co/hf-inference/v1/chat/completions"
FALLBACK_URL = "https://router.huggingface.co/v1/chat/completions"
async def call_llm(messages: list, max_tokens: int = 8192, temperature: float = 0.7) -> str:
HF_API_TOKEN = os.environ.get("HF_TOKEN")
if not HF_API_TOKEN:
return "Error: HF_TOKEN not configured in Space secrets."
headers = {
"Authorization": f"Bearer {HF_API_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": HF_MODEL_ID,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": 0.95,
"stream": False # always False — streaming not implemented
}
# NOTE: tools/tool_choice intentionally NOT forwarded to HF Router
# Sending tools causes infinite spin loop in OpenCode with custom providers
try:
async with httpx.AsyncClient(timeout=180.0) as client:
response = await client.post(PRIMARY_URL, headers=headers, json=payload)
if response.status_code in [404, 410]:
response = await client.post(FALLBACK_URL, headers=headers, json=payload)
if response.status_code == 403:
return f"Auth Error (403): Enable 'Inference Providers' on your HF token. Detail: {response.text}"
response.raise_for_status()
raw = response.text
if not raw or not raw.strip():
return "Error: HF Router returned empty response (model may be loading, retry in 10s)"
try:
result = response.json()
except Exception:
return f"Error: Non-JSON response from HF Router: {raw[:500]}"
if "choices" in result and result["choices"]:
return result["choices"][0]["message"]["content"]
elif isinstance(result, list) and result:
return result[0].get("generated_text", "No response generated")
else:
return f"Unexpected response format: {str(result)[:300]}"
except httpx.TimeoutException:
return "Error: Request timed out after 180s. Try a shorter prompt or reduce max_tokens."
except httpx.HTTPStatusError as e:
return f"Error: HTTP {e.response.status_code} — {e.response.text}"
except Exception as e:
return f"Error during inference: {str(e)}"
# ============== OpenAI-Compatible Endpoints ==============
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse, tags=["OpenAI Compatible"])
async def chat_completions(request: ChatCompletionRequest):
"""OpenAI-compatible endpoint. Passes full conversation history, strips tool calls."""
try:
# Pass ALL messages directly — full multi-turn history preserved for OpenCode context
# tools/tool_choice fields are accepted by the model but NOT forwarded to HF Router
messages = [{"role": m.role, "content": m.content} for m in request.messages]
max_tokens = request.max_tokens if request.max_tokens is not None else 8192
temperature = request.temperature if request.temperature is not None else 0.7
response_text = await call_llm(
messages=messages,
max_tokens=max_tokens,
temperature=temperature
)
full_prompt = " ".join(m["content"] for m in messages)
prompt_tokens = len(full_prompt) // 4
completion_tokens = len(response_text) // 4
return ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request.model,
choices=[Choice(
index=0,
message=Message(role="assistant", content=response_text),
finish_reason="stop"
)],
usage=Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens
)
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
@app.get("/v1/models", tags=["OpenAI Compatible"])
async def list_models():
return {
"object": "list",
"data": [
{
"id": "qwen",
"object": "model",
"created": int(time.time()),
"owned_by": "jeeltcraft",
"context_length": 32768
}
]
}
# ============== Utilities ==============
def convert_eth_units(value: float, from_unit: str = "eth") -> dict:
if from_unit.lower() == "eth":
wei_value = int(value * 10**18)
elif from_unit.lower() == "gwei":
wei_value = int(value * 10**9)
elif from_unit.lower() == "wei":
wei_value = int(value)
else:
raise ValueError("Invalid unit. Use 'eth', 'gwei', or 'wei'")
return {
"input": {"value": value, "unit": from_unit},
"conversions": {"wei": str(wei_value), "gwei": wei_value / 10**9, "eth": wei_value / 10**18},
"formatted": {
"wei": f"{wei_value:,} wei",
"gwei": f"{wei_value / 10**9:,.2f} gwei",
"eth": f"{wei_value / 10**18:.18f} ETH"
}
}
@app.post("/llm_on_cpu", tags=["LLM"])
async def llm_direct(item: Validation):
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": item.prompt}
]
return {"response": await call_llm(messages)}
@app.post("/convert_eth_units", tags=["Utilities"])
async def convert_units(request: EthConversionRequest):
try:
return convert_eth_units(request.value, request.from_unit)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/eth_to_units", tags=["Utilities"])
async def eth_to_units(item: Validation):
match = re.search(r'\d+\.?\d*', item.prompt)
if match:
return convert_eth_units(float(match.group()), "eth")
raise HTTPException(status_code=400, detail="No numeric value found")
@app.get("/quick_convert/{value}/{unit}", tags=["Utilities"])
async def quick_convert(value: float, unit: str = "eth"):
try:
return convert_eth_units(value, unit)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/", tags=["Utilities"])
async def root():
return {
"message": "Luminous API — OpenAI Compatible Coding Assistant",
"model": HF_MODEL_ID,
"status": "active",
"docs": "/docs"
}
@app.get("/health", tags=["Utilities"])
async def health_check():
return {
"status": "healthy",
"model": HF_MODEL_ID,
"hf_token_configured": bool(os.environ.get("HF_TOKEN")),
"api_version": "1.2.0"
}
|