Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

File size: 3,479 Bytes

c97ad08
 
 
e34beb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c97ad08
 
 
 
 
 
 
 
 
e34beb2
 
12d2e34
 
c97ad08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12d2e34
e34beb2
 
 
 
 
1bbff15
e34beb2
 
 
 
1bbff15
e34beb2
 
 
 
 
 
 
 
 
 
 
 
12d2e34
 
ca2bb8e
0e8e243
 
 
 
 
 
 
ca2bb8e
12d2e34
 
 
 
 
e34beb2
12d2e34
 
c97ad08

"""
FitCheck — what AI can your computer actually run?

Four bricks behind a `gr.Server` (which IS a FastAPI app) serving the
hand-built frontend in static/:

  - /api/advise   : the honest verdict. Deterministic engine (engine/) over
                    catalogue.json — 83 real models with exact GGUF file sizes,
                    licenses, and links, refreshed from the Hugging Face API at
                    build time. The running app makes no network calls here.
  - /api/minspecs : the reverse question — "what machine do I need for X?"
                    Same engine, inverted over a hardware ladder. Offline.
  - /api/lookup   : OPTIONAL live check of any pasted HF repo id. Walks the
                    model-tree (finetune -> base) to a catalogue entry, or does
                    labelled raw math. The one endpoint that touches the
                    network, and the UI says so.
  - /gradio_api/call/ask : the model brick (model_brick.ask) — a small local
                    LLM that explains the engine's numbers in plain words.
                    @app.api so it runs on Gradio's queue and gets ZeroGPU.
"""

from pathlib import Path

import gradio as gr
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

from engine.real_advisor import advise_real, min_specs
from engine.ui_adapter import spec_from_payload
from model_brick import ask as model_ask

STATIC = Path(__file__).parent / "static"

app = gr.Server()


class AdviseIn(BaseModel):
    computer: str = "Windows laptop"
    ram_gb: float | None = 16
    provider: str = "none"
    gpu: str = ""
    vram_gb: float | None = None
    paste: str = ""
    usecase: str = "chat"
    custom: str = ""
    priority: str = "balanced"


@app.post("/api/advise")
def api_advise(payload: AdviseIn):
    p = payload.model_dump()
    return advise_real(p, spec_from_payload(p))


class MinSpecsIn(BaseModel):
    usecase: str = "chat"
    usecases: list[str] | None = None   # multi-goal: union of requirements


@app.post("/api/minspecs")
def api_minspecs(payload: MinSpecsIn):
    return min_specs(payload.usecases or [payload.usecase])


class LookupIn(AdviseIn):
    repo: str = ""


@app.post("/api/lookup")
def api_lookup(payload: LookupIn):
    """Live lookup of one HF repo id (labelled online in the UI)."""
    from engine.hub_lookup import lookup
    p = payload.model_dump()
    return lookup(p.get("repo", ""), p, spec_from_payload(p))


@app.api(name="parse", concurrency_limit=1)
def api_parse(text: str = "") -> dict:
    """Messy machine description -> form fields, via the fine-tuned spec
    parser (cn0303/fitcheck-spec-parser). ZeroGPU via the Gradio queue."""
    from spec_brick import parse_specs
    return parse_specs(text)


@app.api(name="ask", concurrency_limit=1)
def api_ask(question: str, facts: str = "") -> dict:
    """Plain-English follow-up, grounded in the facts /api/advise returned.

    Exposed at /gradio_api/call/ask (NOT a plain POST) so it runs through
    Gradio's queue and gets a ZeroGPU allocation. `facts` is the JSON string of
    the last /api/advise result. Returns {headline, why, next_step} or {error}.
    """
    return model_ask(question, facts)


app.mount("/static", StaticFiles(directory=STATIC), name="static")


@app.get("/")
def index():
    return FileResponse(STATIC / "index.html")


if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860)