Spaces:

mindchain
/

nemo-datadesigner-api

Sleeping

App Files Files Community

mindchain commited on 27 days ago

Commit

bf2ae7f

verified ·

1 Parent(s): e3d4ce2

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +236 -0

app.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import os
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from typing import Any
+from models import (
+    GenerateRequest, GenerateResponse,
+    PreviewRequest, PreviewResponse,
+    HealthResponse, ZaiModel, SamplerType
+)
+# Configure z.ai as OpenAI-compatible provider
+ZAI_API_KEY = os.environ.get("ZAI_API_KEY", "")
+ZAI_BASE_URL = "https://api.z.ai/api/anthropic"
+os.environ["OPENAI_API_KEY"] = ZAI_API_KEY
+os.environ["OPENAI_API_BASE"] = ZAI_BASE_URL
+# Global DataDesigner instance
+data_designer = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global data_designer
+    from data_designer.interface import DataDesigner
+    data_designer = DataDesigner()
+    yield
+app = FastAPI(
+    title="NeMo DataDesigner API",
+    description="Synthetic data generation with NVIDIA NeMo DataDesigner and z.ai",
+    version="1.0.0",
+    lifespan=lifespan
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def build_config(request: GenerateRequest | PreviewRequest):
+    """Build DataDesigner configuration from request."""
+    import data_designer.config as dd
+    from data_designer.config.models import ModelConfig, ChatCompletionInferenceParams
+    config_builder = dd.DataDesignerConfigBuilder()
+    model_id = request.model.value
+    # Process columns
+    for col in request.columns:
+        if col.type == "sampler":
+            sampler_type_str = col.params.get("sampler_type", "CATEGORY")
+            sampler_type = getattr(dd.SamplerType, sampler_type_str, dd.SamplerType.CATEGORY)
+            params_class = get_sampler_params(sampler_type, col.params)
+            config_builder.add_column(
+                dd.SamplerColumnConfig(
+                    name=col.name,
+                    sampler_type=sampler_type,
+                    params=params_class,
+                )
+            )
+        elif col.type == "llm_text":
+            config_builder.add_column(
+                dd.LLMTextColumnConfig(
+                    name=col.name,
+                    model_alias="zai-model",
+                    prompt=col.params.get("prompt", "Generate text"),
+                )
+            )
+        elif col.type == "llm_code":
+            config_builder.add_column(
+                dd.LLMCodeColumnConfig(
+                    name=col.name,
+                    model_alias="zai-model",
+                    prompt=col.params.get("prompt", "Generate code"),
+                    language=col.params.get("language", "python"),
+                )
+            )
+        elif col.type == "llm_structured":
+            config_builder.add_column(
+                dd.LLMStructuredColumnConfig(
+                    name=col.name,
+                    model_alias="zai-model",
+                    prompt=col.params.get("prompt", "Generate structured data"),
+                    schema=col.params.get("schema", {}),
+                )
+            )
+    # Add model config
+    model_config = ModelConfig(
+        alias="zai-model",
+        model=f"openai/{model_id}",
+        provider="openai",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+        ),
+    )
+    config_builder.add_model_config(model_config)
+    return config_builder
+def get_sampler_params(sampler_type, params: dict) -> Any:
+    """Get appropriate sampler params based on type."""
+    import data_designer.config as dd
+    type_name = sampler_type.name if hasattr(sampler_type, 'name') else str(sampler_type)
+    if type_name == "CATEGORY":
+        return dd.CategorySamplerParams(
+            values=params.get("values", ["A", "B", "C"])
+        )
+    elif type_name == "UNIFORM":
+        return dd.UniformSamplerParams(
+            low=params.get("low", 0),
+            high=params.get("high", 100)
+        )
+    elif type_name == "GAUSSIAN":
+        return dd.GaussianSamplerParams(
+            mean=params.get("mean", 0),
+            std=params.get("std", 1)
+        )
+    elif type_name == "DATETIME":
+        return dd.DateTimeSamplerParams(
+            start_date=params.get("start_date", "2020-01-01"),
+            end_date=params.get("end_date", "2025-12-31")
+        )
+    else:
+        return dd.CategorySamplerParams(values=["default"])
+@app.get("/", response_model=HealthResponse)
+async def root():
+    """Health check endpoint."""
+    return HealthResponse(
+        status="healthy",
+        model="data-designer",
+        api_configured=bool(ZAI_API_KEY)
+    )
+@app.get("/health", response_model=HealthResponse)
+async def health():
+    """Health check endpoint."""
+    return HealthResponse(
+        status="healthy",
+        model="data-designer",
+        api_configured=bool(ZAI_API_KEY)
+    )
+@app.post("/generate", response_model=GenerateResponse)
+async def generate(request: GenerateRequest):
+    """
+    Generate synthetic data.
+    """
+    try:
+        config_builder = build_config(request)
+        result = data_designer.generate(
+            config_builder=config_builder,
+            num_records=request.num_records,
+        )
+        df = result.to_pandas()
+        data = df.to_dict(orient="records")
+        return GenerateResponse(
+            success=True,
+            data=data,
+            record_count=len(data)
+        )
+    except Exception as e:
+        return GenerateResponse(
+            success=False,
+            error=str(e)
+        )
+@app.post("/preview", response_model=PreviewResponse)
+async def preview(request: PreviewRequest):
+    """
+    Preview a single record without full generation.
+    """
+    try:
+        config_builder = build_config(request)
+        preview_result = data_designer.preview(config_builder=config_builder)
+        return PreviewResponse(
+            success=True,
+            sample=preview_result.sample_record
+        )
+    except Exception as e:
+        return PreviewResponse(
+            success=False,
+            error=str(e)
+        )
+@app.get("/models")
+async def list_models():
+    """List available z.ai models."""
+    return {
+        "models": [
+            {"id": "glm-5", "name": "GLM-5 (Opus)", "description": "Most capable model"},
+            {"id": "glm-4.7", "name": "GLM-4.7 (Sonnet)", "description": "Balanced performance"},
+            {"id": "glm-4.5-air", "name": "GLM-4.5-Air (Haiku)", "description": "Fast and efficient"}
+        ]
+    }
+@app.get("/sampler-types")
+async def list_sampler_types():
+    """List available sampler types."""
+    return {
+        "sampler_types": [
+            {"id": "CATEGORY", "params": ["values"]},
+            {"id": "UNIFORM", "params": ["low", "high"]},
+            {"id": "GAUSSIAN", "params": ["mean", "std"]},
+            {"id": "UUID", "params": []},
+            {"id": "DATETIME", "params": ["start_date", "end_date"]},
+            {"id": "PERSON", "params": ["locale", "include_attributes"]}
+        ]
+    }