File size: 8,774 Bytes
8e2d3ce
5dab16f
8062422
e2b3072
b8f0d37
 
 
97ad689
5dab16f
0047e36
 
6c2e700
 
 
8e2d3ce
 
97ad689
8e2d3ce
b8f0d37
 
 
 
 
 
 
 
8062422
b8f0d37
8062422
 
 
 
 
 
b8f0d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0047e36
 
8dabf79
 
 
 
12422af
 
8dabf79
8062422
12422af
 
8dabf79
 
12422af
 
 
0047e36
12422af
 
 
 
97ad689
 
12422af
 
07bdd5c
 
 
 
12422af
97ad689
e9f41d5
8062422
e9f41d5
 
 
8062422
97ad689
8062422
 
12422af
0047e36
8062422
12422af
 
e9f41d5
12422af
 
e9f41d5
12422af
 
97ad689
12422af
 
 
 
 
 
 
 
 
 
 
e9f41d5
12422af
97ad689
 
12422af
 
97ad689
12422af
97ad689
12422af
0047e36
 
 
b8f0d37
 
97ad689
b8f0d37
8062422
b8f0d37
8062422
 
12422af
 
 
 
 
 
 
 
 
5e7aadc
12422af
 
 
 
 
b8f0d37
 
 
 
97ad689
 
 
 
 
b8f0d37
 
 
 
 
 
 
 
 
12422af
97ad689
b8f0d37
 
 
 
12422af
 
 
 
 
 
 
b8f0d37
 
 
12422af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97ad689
12422af
 
 
 
 
 
6c2e700
97ad689
8e2d3ce
97ad689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12422af
 
97ad689
 
8e2d3ce
97ad689
 
 
 
 
12422af
 
8062422
8e2d3ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Any
import os
import uuid
import time
import re
import httpx

# ============== Pydantic Models ==============

class Validation(BaseModel):
    prompt: str

class EthConversionRequest(BaseModel):
    value: float
    from_unit: str = "eth"

class Message(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[Message]
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 8192
    stream: Optional[bool] = False
    tools: Optional[List[Any]] = None          # accept but ignore — prevents OpenCode hang
    tool_choice: Optional[Any] = None          # accept but ignore
    stop: Optional[Any] = None                 # accept but ignore
    frequency_penalty: Optional[float] = None  # accept but ignore
    presence_penalty: Optional[float] = None   # accept but ignore
    top_p: Optional[float] = None              # accept but ignore

class Choice(BaseModel):
    index: int
    message: Message
    finish_reason: str

class Usage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int

class ChatCompletionResponse(BaseModel):
    id: str
    object: str = "chat.completion"
    created: int
    model: str
    choices: List[Choice]
    usage: Usage

# ============== FastAPI App ==============

app = FastAPI(
    title="Luminous API",
    description="""
    ## Luminous Coding Assistant API
    OpenAI-compatible API powered by Qwen via HuggingFace Router Inference API.
    Set Base URL: `https://jeeltcraft-luminous.hf.space/v1`
    """,
    version="1.2.0",
    contact={"name": "Jeeltcraft", "url": "https://huggingface.co/jeeltcraft"},
    license_info={"name": "MIT"},
)

# ============== LLM Core ==============

HF_MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"

PRIMARY_URL  = "https://router.huggingface.co/hf-inference/v1/chat/completions"
FALLBACK_URL = "https://router.huggingface.co/v1/chat/completions"

async def call_llm(messages: list, max_tokens: int = 8192, temperature: float = 0.7) -> str:
    HF_API_TOKEN = os.environ.get("HF_TOKEN")
    if not HF_API_TOKEN:
        return "Error: HF_TOKEN not configured in Space secrets."

    headers = {
        "Authorization": f"Bearer {HF_API_TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": HF_MODEL_ID,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": 0.95,
        "stream": False                    # always False — streaming not implemented
    }
    # NOTE: tools/tool_choice intentionally NOT forwarded to HF Router
    # Sending tools causes infinite spin loop in OpenCode with custom providers

    try:
        async with httpx.AsyncClient(timeout=180.0) as client:
            response = await client.post(PRIMARY_URL, headers=headers, json=payload)

            if response.status_code in [404, 410]:
                response = await client.post(FALLBACK_URL, headers=headers, json=payload)

            if response.status_code == 403:
                return f"Auth Error (403): Enable 'Inference Providers' on your HF token. Detail: {response.text}"

            response.raise_for_status()

            raw = response.text
            if not raw or not raw.strip():
                return "Error: HF Router returned empty response (model may be loading, retry in 10s)"

            try:
                result = response.json()
            except Exception:
                return f"Error: Non-JSON response from HF Router: {raw[:500]}"

            if "choices" in result and result["choices"]:
                return result["choices"][0]["message"]["content"]
            elif isinstance(result, list) and result:
                return result[0].get("generated_text", "No response generated")
            else:
                return f"Unexpected response format: {str(result)[:300]}"

    except httpx.TimeoutException:
        return "Error: Request timed out after 180s. Try a shorter prompt or reduce max_tokens."
    except httpx.HTTPStatusError as e:
        return f"Error: HTTP {e.response.status_code}{e.response.text}"
    except Exception as e:
        return f"Error during inference: {str(e)}"

# ============== OpenAI-Compatible Endpoints ==============

@app.post("/v1/chat/completions", response_model=ChatCompletionResponse, tags=["OpenAI Compatible"])
async def chat_completions(request: ChatCompletionRequest):
    """OpenAI-compatible endpoint. Passes full conversation history, strips tool calls."""
    try:
        # Pass ALL messages directly — full multi-turn history preserved for OpenCode context
        # tools/tool_choice fields are accepted by the model but NOT forwarded to HF Router
        messages = [{"role": m.role, "content": m.content} for m in request.messages]

        max_tokens = request.max_tokens if request.max_tokens is not None else 8192
        temperature = request.temperature if request.temperature is not None else 0.7

        response_text = await call_llm(
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature
        )

        full_prompt = " ".join(m["content"] for m in messages)
        prompt_tokens = len(full_prompt) // 4
        completion_tokens = len(response_text) // 4

        return ChatCompletionResponse(
            id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
            created=int(time.time()),
            model=request.model,
            choices=[Choice(
                index=0,
                message=Message(role="assistant", content=response_text),
                finish_reason="stop"
            )],
            usage=Usage(
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                total_tokens=prompt_tokens + completion_tokens
            )
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")


@app.get("/v1/models", tags=["OpenAI Compatible"])
async def list_models():
    return {
        "object": "list",
        "data": [
            {
                "id": "qwen",
                "object": "model",
                "created": int(time.time()),
                "owned_by": "jeeltcraft",
                "context_length": 32768
            }
        ]
    }

# ============== Utilities ==============

def convert_eth_units(value: float, from_unit: str = "eth") -> dict:
    if from_unit.lower() == "eth":
        wei_value = int(value * 10**18)
    elif from_unit.lower() == "gwei":
        wei_value = int(value * 10**9)
    elif from_unit.lower() == "wei":
        wei_value = int(value)
    else:
        raise ValueError("Invalid unit. Use 'eth', 'gwei', or 'wei'")
    return {
        "input": {"value": value, "unit": from_unit},
        "conversions": {"wei": str(wei_value), "gwei": wei_value / 10**9, "eth": wei_value / 10**18},
        "formatted": {
            "wei": f"{wei_value:,} wei",
            "gwei": f"{wei_value / 10**9:,.2f} gwei",
            "eth": f"{wei_value / 10**18:.18f} ETH"
        }
    }

@app.post("/llm_on_cpu", tags=["LLM"])
async def llm_direct(item: Validation):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": item.prompt}
    ]
    return {"response": await call_llm(messages)}

@app.post("/convert_eth_units", tags=["Utilities"])
async def convert_units(request: EthConversionRequest):
    try:
        return convert_eth_units(request.value, request.from_unit)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.post("/eth_to_units", tags=["Utilities"])
async def eth_to_units(item: Validation):
    match = re.search(r'\d+\.?\d*', item.prompt)
    if match:
        return convert_eth_units(float(match.group()), "eth")
    raise HTTPException(status_code=400, detail="No numeric value found")

@app.get("/quick_convert/{value}/{unit}", tags=["Utilities"])
async def quick_convert(value: float, unit: str = "eth"):
    try:
        return convert_eth_units(value, unit)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/", tags=["Utilities"])
async def root():
    return {
        "message": "Luminous API — OpenAI Compatible Coding Assistant",
        "model": HF_MODEL_ID,
        "status": "active",
        "docs": "/docs"
    }

@app.get("/health", tags=["Utilities"])
async def health_check():
    return {
        "status": "healthy",
        "model": HF_MODEL_ID,
        "hf_token_configured": bool(os.environ.get("HF_TOKEN")),
        "api_version": "1.2.0"
    }