# Gradio import gradio as gr # ML from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from typing import Tuple, Dict model_path = "./" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) model.eval() reverse_label_mapping = {0: 'gpt-4.1-nano', 1: 'gpt-4.1', 2: 'o4-mini'} def route_query(query: str) -> Tuple[str, float, Dict[str, str]]: """ Route query endpoint """ if not query.strip(): return "Please enter a query", 0.0, {} inputs = tokenizer( query, padding='max_length', truncation=True, max_length=128, return_tensors='pt' ) with torch.no_grad(): outputs = model(**inputs) predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) predicted_class = torch.argmax(predictions, dim=-1) confidence = torch.max(predictions, dim=-1)[0] probs = predictions.cpu().numpy()[0] probabilities = {reverse_label_mapping[i]: f"{prob:.3f}" for i, prob in enumerate(probs)} recommended_model = reverse_label_mapping[predicted_class.item()] return recommended_model, f"{confidence.item():.3f}", probabilities iface = gr.Interface( fn=route_query, inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), outputs=[ gr.Textbox(label="Recommended Model"), gr.Textbox(label="Confidence"), gr.JSON(label="All Probabilities") ], title="GPT Router Model", description="Enter a query to get routing recommendation to the appropriate GPT model" ) if __name__ == "__main__": iface.launch()