File size: 7,975 Bytes
4a2546a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import os
from typing import Any
from importlib.util import spec_from_file_location, module_from_spec
from logging import getLogger
from random import randint
from traceback import format_exc

from uvicorn import run
from fastapi import FastAPI
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from babelbit.chute_template.schemas import (
    BBPredictedUtterance,
    BBPredictOutput,
)
from babelbit.utils.settings import get_settings
from babelbit.utils.async_clients import get_async_client

settings = get_settings()
chute_template_load_spec = spec_from_file_location(
    "chute_load",
    str(settings.PATH_CHUTE_TEMPLATES / settings.FILENAME_CHUTE_LOAD_UTILS),
)
chute_template_load = module_from_spec(chute_template_load_spec)
chute_template_load.os = os
chute_template_load.Any = Any
chute_template_load.snapshot_download = snapshot_download
chute_template_load.AutoTokenizer = AutoTokenizer
chute_template_load.AutoModelForCausalLM = AutoModelForCausalLM
chute_template_load_spec.loader.exec_module(chute_template_load)

chute_template_predict_spec = spec_from_file_location(
    "chute_predict",
    str(settings.PATH_CHUTE_TEMPLATES / settings.FILENAME_CHUTE_PREDICT_UTILS),
)
chute_template_predict = module_from_spec(chute_template_predict_spec)
chute_template_predict.Any = Any
chute_template_predict.randint = randint
chute_template_predict.format_exc = format_exc
chute_template_predict.torch = torch
chute_template_predict.BBPredictedUtterance = BBPredictedUtterance
chute_template_predict.BBPredictOutput = BBPredictOutput
chute_template_predict_spec.loader.exec_module(chute_template_predict)

logger = getLogger(__name__)


def deploy_mock_chute(huggingface_repo: str, huggingface_revision: str) -> None:
    chute = FastAPI(title="mock-chute")
    global model
    model = None

    @chute.on_event("startup")
    async def load_model():
        global model
        model = chute_template_load._load_model(
            repo_name=huggingface_repo,
            revision=huggingface_revision,
        )

    @chute.post("/health")
    async def health() -> dict[str, Any]:
        return chute_template_load._health(
            model=model,
            repo_name=huggingface_repo,
        )

    @chute.post("/" + settings.CHUTES_MINER_PREDICT_ENDPOINT)
    async def predict(data: BBPredictedUtterance) -> BBPredictOutput:
        return chute_template_predict._predict(
            model=model,
            data=data,
            model_name=huggingface_repo,
        )

    @chute.get("/api/tasks/next/v2")
    async def mock_challenge():
        return {
            "task_id": "0",  # utterance prediction
            "challenge_uid": "mock-challenge-001",
            "dialogues": [
                {
                    "dialogue_uid": "mock-dialogue-001",
                    "utterances": [
                        "Hello, how are you today?",
                        "I'm doing well, thank you for asking."
                    ]
                }
            ]
        }

    run(chute)


async def test_chute_health_endpoint(base_url: str) -> None:
    logger.info("πŸ” Testing `/health`...")
    session = await get_async_client()
    settings = get_settings()
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {settings.CHUTES_API_KEY.get_secret_value()}",
    }
    url = f"{base_url}/health"
    logger.info(url)
    try:
        async with session.post(url, headers=headers, json={}) as response:
            text = await response.text()
            logger.info(f"Response: {text} ({response.status})")
            health = await response.json()
            logger.info(health)
        assert health.get("model_loaded"), "Model not loaded"
        logger.info("βœ… /health passed")
    except Exception as e:
        logger.error(f"❌ /health failed: {e}")


async def get_chute_logs(instance_id: str) -> None:
    session = await get_async_client()
    settings = get_settings()
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {settings.CHUTES_API_KEY.get_secret_value()}",
    }
    url = f"https://api.chutes.ai/instances/{instance_id}/logs"  # ?backfill=10000"
    logger.info(url)
    try:
        async with session.get(url, headers=headers) as response:
            text = await response.text()
            logger.info(f"Response: {text} ({response.status})")
    except Exception as e:
        logger.error(f"❌ /logs failed: {e}")


async def test_chute_predict_endpoint(
    base_url: str, test_utterances: list[BBPredictedUtterance]
) -> None:
    logger.info("πŸ” Testing `/predict` with utterance data...")
    session = await get_async_client()
    settings = get_settings()
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {settings.CHUTES_API_KEY.get_secret_value()}",
    }
    url = f"{base_url}/{settings.CHUTES_MINER_PREDICT_ENDPOINT}"
    logger.info(url)
    
    try:
        successful_predictions = 0
        total_predictions = len(test_utterances)
        
        for i, utterance in enumerate(test_utterances):
            logger.info(f"Testing utterance {i+1}/{total_predictions}: '{utterance.prefix}'")
            
            async with session.post(
                url,
                headers=headers,
                json=utterance.model_dump(mode="json"),
            ) as response:
                text = await response.text()
                logger.info(f"Response status: {response.status}")
                assert response.status == 200, f"Non-200 response from predict for utterance '{utterance.prefix}'"
                output = await response.json()
                # logger.info(f"Prediction output: {output}")  # Commented out to reduce noise
            
            # Validate the response structure
            assert output["success"] is True, f"Prediction failed: {output}"
            assert "utterance" in output, "Missing utterance in response"
            assert "prediction" in output["utterance"], "Missing prediction in utterance"
            
            # Check that we got a non-empty prediction
            prediction = output["utterance"]["prediction"]
            assert isinstance(prediction, str), f"Prediction should be string, got {type(prediction)}"
            assert len(prediction.strip()) > 0, f"Empty prediction for input '{utterance.prefix}'"
            
            # Verify the utterance structure is preserved
            returned_utterance = output["utterance"]
            assert returned_utterance["index"] == utterance.index, "Utterance index mismatch"
            assert returned_utterance["step"] == utterance.step, "Utterance step mismatch"
            assert returned_utterance["prefix"] == utterance.prefix, "Utterance prefix mismatch"
            
            logger.info(f"βœ… Utterance {i+1} prediction: '{utterance.prefix}' β†’ '{prediction}'")
            successful_predictions += 1
        
        logger.info(f"βœ… /predict passed: {successful_predictions}/{total_predictions} predictions successful")
        
    except Exception as e:
        logger.error(f"❌ /predict failed: {e}")
        raise


# Helper function to create test utterances
def create_test_utterances() -> list[BBPredictedUtterance]:
    """Create a set of test utterances for prediction testing"""
    test_cases = [
        ("Hello", "session-1", 1),
        ("The weather today is", "session-2", 1), 
        ("Once upon a time", "session-3", 1),
        ("I think that", "session-4", 1),
        ("The quick brown fox", "session-5", 1),
    ]
    
    return [
        BBPredictedUtterance(
            index=session_id,
            step=step,
            prefix=prefix,
            prediction="",  # Will be filled by the model
            ground_truth=None,
            done=False
        )
        for prefix, session_id, step in test_cases
    ]