Spaces:

daoqm123
/

llm-error-classifier-api

Sleeping

App Files Files Community

daoqm123 commited on Nov 10, 2025

Commit

56db9b3

1 Parent(s): 877b44a

Update backend

Browse files

Files changed (1) hide show

main.py +172 -77

main.py CHANGED Viewed

@@ -12,12 +12,16 @@ import json
 import os
 import time
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Global model and tokenizer
 model = None
 tokenizer = None
 device = None
 os.environ["CUDA_VISIBLE_DEVICES"] = "7"
 @asynccontextmanager
@@ -59,6 +63,10 @@ async def lifespan(app: FastAPI):
     model.eval()
     print("Model loaded successfully!")
     yield  # Application runs here
     # Shutdown (if needed)
@@ -188,92 +196,179 @@ async def classify(request: ClassificationRequest):
         raise HTTPException(status_code=500, detail=f"Classification error: {str(e)}")
 @app.get("/api/examples")
 async def get_examples():
-    """Return example inputs for testing"""
-    examples = [
-        {
-            "name": "Correct Example",
-            "description": "A properly formed tool call",
-            "data": {
-                "query": "What's the weather in New York?",
-                "enabled_tools": [
-                    {
-                        "name": "get_weather",
-                        "description": "Get current weather for a location",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "location": {"type": "string"},
-                                "units": {"type": "string", "enum": ["celsius", "fahrenheit"]}
-                            },
-                            "required": ["location"]
-                        }
-                    }
-                ],
-                "tool_calling": {
-                    "name": "get_weather",
-                    "arguments": {
-                        "location": "New York",
-                        "units": "fahrenheit"
                     }
-                }
-            }
-        },
-        {
-            "name": "Wrong Function Name",
-            "description": "Tool call uses incorrect function name",
-            "data": {
-                "query": "Calculate 25 * 4",
-                "enabled_tools": [
-                    {
-                        "name": "calculator",
-                        "description": "Perform calculations",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "expression": {"type": "string"}
                             }
                         }
-                    }
-                ],
-                "tool_calling": {
-                    "name": "calculate",  # Wrong name!
-                    "arguments": {
-                        "expression": "25 * 4"
-                    }
-                }
-            }
-        },
-        {
-            "name": "Incorrect Argument Type",
-            "description": "Argument has wrong data type",
-            "data": {
-                "query": "Set a reminder for 3pm",
-                "enabled_tools": [
-                    {
-                        "name": "set_reminder",
-                        "description": "Create a reminder",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "time": {"type": "string"},
-                                "message": {"type": "string"}
-                            }
                         }
                     }
-                ],
-                "tool_calling": {
-                    "name": "set_reminder",
-                    "arguments": {
-                        "time": 1500,  # Should be string!
-                        "message": "Meeting"
-                    }
                 }
             }
-        }
-    ]
     return {"examples": examples}

 import os
 import time
 import torch
+import random
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from collections import defaultdict
 # Global model and tokenizer
 model = None
 tokenizer = None
 device = None
+dataset_by_label = None
+dataset_path = None
 os.environ["CUDA_VISIBLE_DEVICES"] = "7"
 @asynccontextmanager
     model.eval()
     print("Model loaded successfully!")
+    # Load dataset for examples
+    print("Loading dataset for examples...")
+    load_dataset()
     yield  # Application runs here
     # Shutdown (if needed)
         raise HTTPException(status_code=500, detail=f"Classification error: {str(e)}")
+def load_dataset():
+    """Load dataset and group examples by label"""
+    global dataset_by_label, dataset_path
+    # Try to find dataset file - check multiple possible locations
+    possible_paths = [
+        os.path.join(os.path.dirname(__file__), "../../dataset/xlam-function-calling-60k/xlam_function_calling_60k_processed_with_ground_truth.json"),
+        "/work/cssema416/202610/12/dataset/xlam-function-calling-60k/xlam_function_calling_60k_processed_with_ground_truth.json",
+        os.getenv("DATASET_PATH", ""),
+    ]
+    dataset_path = None
+    for path in possible_paths:
+        if path and os.path.exists(path):
+            dataset_path = path
+            break
+    if not dataset_path:
+        print("Warning: Dataset file not found. Using hardcoded examples.")
+        return None
+    try:
+        print(f"Loading dataset from: {dataset_path}")
+        with open(dataset_path, 'r') as f:
+            data = json.load(f)
+        # Group examples by ground_truth label
+        dataset_by_label = defaultdict(list)
+        for item in data:
+            label = item.get('ground_truth', 'Unknown')
+            if label in LABEL_MAP.values():
+                dataset_by_label[label].append(item)
+        print(f"Loaded {len(data)} examples. Examples per label: {dict((k, len(v)) for k, v in dataset_by_label.items())}")
+        return dataset_by_label
+    except Exception as e:
+        print(f"Error loading dataset: {e}")
+        return None
+def convert_dataset_example_to_api_format(item: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert dataset example to API format"""
+    # Convert tools format
+    enabled_tools = []
+    for tool in item.get('tools', []):
+        # Convert parameters from dict format to JSON Schema format
+        properties = {}
+        required = []
+        tool_params = tool.get('parameters', {})
+        if isinstance(tool_params, dict):
+            for param_name, param_info in tool_params.items():
+                if isinstance(param_info, dict):
+                    param_type = param_info.get('type', 'string')
+                    # Map Python types to JSON types
+                    type_mapping = {
+                        'str': 'string',
+                        'int': 'integer',
+                        'float': 'number',
+                        'bool': 'boolean',
+                        'list': 'array',
+                        'dict': 'object'
+                    }
+                    json_type = type_mapping.get(param_type, 'string')
+                    prop = {"type": json_type}
+                    if 'description' in param_info:
+                        prop['description'] = param_info['description']
+                    if 'enum' in param_info:
+                        prop['enum'] = param_info['enum']
+                    if 'default' not in param_info:  # If no default, might be required
+                        required.append(param_name)
+                    properties[param_name] = prop
+        tool_schema = {
+            "name": tool.get('name', ''),
+            "description": tool.get('description', ''),
+            "parameters": {
+                "type": "object",
+                "properties": properties
+            }
+        }
+        if required:
+            tool_schema["parameters"]["required"] = required
+        enabled_tools.append(tool_schema)
+    # Get tool calling from answers
+    tool_calling = None
+    if item.get('answers') and len(item['answers']) > 0:
+        answer = item['answers'][0]
+        tool_calling = {
+            "name": answer.get('name', ''),
+            "arguments": answer.get('arguments', {})
+        }
+    return {
+        "query": item.get('query', ''),
+        "enabled_tools": enabled_tools,
+        "tool_calling": tool_calling
+    }
 @app.get("/api/examples")
 async def get_examples():
+    """Return random example inputs from dataset, grouped by label"""
+    global dataset_by_label
+    # Load dataset if not already loaded
+    if dataset_by_label is None:
+        load_dataset()
+    examples = []
+    # If dataset is loaded, get random examples from each label
+    if dataset_by_label:
+        # Get one random example from each label
+        for label in LABEL_MAP.values():
+            if label in dataset_by_label and len(dataset_by_label[label]) > 0:
+                # Randomly select an example from this label
+                random_example = random.choice(dataset_by_label[label])
+                # Convert to API format
+                try:
+                    api_format = convert_dataset_example_to_api_format(random_example)
+                    # Create example entry
+                    example_entry = {
+                        "name": f"{label} Example",
+                        "description": f"Example of {label.replace('_', ' ').title()}",
+                        "data": api_format
                     }
+                    examples.append(example_entry)
+                except Exception as e:
+                    print(f"Error converting example for label {label}: {e}")
+                    continue
+    else:
+        # Fallback to hardcoded examples if dataset not available
+        examples = [
+            {
+                "name": "Correct Example",
+                "description": "A properly formed tool call",
+                "data": {
+                    "query": "What's the weather in New York?",
+                    "enabled_tools": [
+                        {
+                            "name": "get_weather",
+                            "description": "Get current weather for a location",
+                            "parameters": {
+                                "type": "object",
+                                "properties": {
+                                    "location": {"type": "string"},
+                                    "units": {"type": "string", "enum": ["celsius", "fahrenheit"]}
+                                },
+                                "required": ["location"]
                             }
                         }
+                    ],
+                    "tool_calling": {
+                        "name": "get_weather",
+                        "arguments": {
+                            "location": "New York",
+                            "units": "fahrenheit"
                         }
                     }
                 }
             }
+        ]
+    # Shuffle examples to randomize order
+    random.shuffle(examples)
     return {"examples": examples}