Spaces:
Sleeping
Sleeping
| import pickle | |
| from typing import Optional, Tuple | |
| import numpy as np | |
| import requests | |
| import keras | |
| from sentence_transformers import CrossEncoder | |
| TOKENIZER_PATH = "ai_engine/tokenizer.pkl" | |
| LABEL_ENCODER_PATH = "ai_engine/label_encoder.pkl" | |
| INTENT_MODEL_PATH = "ai_engine/h7_intent_model.keras" | |
| RERANKER_MODEL_NAME = "cross-encoder/ms-marco-MiniLM-L-6-v2" | |
| MODEL_NAME = "llama3.2:1b" | |
| OLLAMA_PULL_URL = "http://localhost:11434/api/pull" | |
| OLLAMA_WARMUP_TIMEOUT = 2 | |
| tokenizer = None | |
| label_encoder = None | |
| intent_model = None | |
| re_ranker = None | |
| models_loaded = False | |
| def patch_layer(layer_class): | |
| original_init = layer_class.__init__ | |
| def patched_init(self, *args, **kwargs): | |
| kwargs.pop("quantization_config", None) | |
| original_init(self, *args, **kwargs) | |
| return patched_init | |
| def apply_keras_patches(): | |
| for layer in ( | |
| keras.layers.Embedding, | |
| keras.layers.Dense, | |
| keras.layers.LSTM, | |
| keras.layers.Bidirectional, | |
| ): | |
| try: | |
| layer.__init__ = patch_layer(layer) | |
| except Exception: | |
| pass | |
| def warmup_ollama_model(): | |
| try: | |
| requests.post( | |
| OLLAMA_PULL_URL, | |
| json={"name": MODEL_NAME}, | |
| timeout=OLLAMA_WARMUP_TIMEOUT, | |
| ) | |
| except Exception: | |
| pass | |
| def load_tokenizer(): | |
| with open(TOKENIZER_PATH, "rb") as f: | |
| return pickle.load(f) | |
| def load_label_encoder(): | |
| with open(LABEL_ENCODER_PATH, "rb") as f: | |
| return pickle.load(f) | |
| def load_intent_model(): | |
| return keras.models.load_model(INTENT_MODEL_PATH) | |
| def load_reranker(): | |
| return CrossEncoder(RERANKER_MODEL_NAME) | |
| def reset_models(): | |
| global tokenizer, label_encoder, intent_model, re_ranker, models_loaded | |
| tokenizer = None | |
| label_encoder = None | |
| intent_model = None | |
| re_ranker = None | |
| models_loaded = False | |
| def load_all_models(force_reload: bool = False): | |
| global tokenizer, label_encoder, intent_model, re_ranker, models_loaded | |
| if models_loaded and not force_reload: | |
| print("✅ Models already loaded.", flush=True) | |
| return | |
| apply_keras_patches() | |
| try: | |
| tokenizer = load_tokenizer() | |
| label_encoder = load_label_encoder() | |
| intent_model = load_intent_model() | |
| try: | |
| re_ranker = load_reranker() | |
| except Exception as rerank_error: | |
| re_ranker = None | |
| print(f"⚠️ Re-ranker load warning: {rerank_error}", flush=True) | |
| warmup_ollama_model() | |
| models_loaded = True | |
| print("✅ Models Loaded.", flush=True) | |
| except Exception as e: | |
| reset_models() | |
| print(f"❌ Startup Error: {e}", flush=True) | |
| def are_models_ready() -> bool: | |
| return tokenizer is not None and label_encoder is not None and intent_model is not None | |
| def predict_intent(text: str, max_sequence_length: int = 20) -> Tuple[str, float]: | |
| if not are_models_ready(): | |
| return "unknown", 0.0 | |
| try: | |
| from keras.preprocessing.sequence import pad_sequences | |
| normalized_text = str(text or "").strip() | |
| if not normalized_text: | |
| return "unknown", 0.0 | |
| seq = tokenizer.texts_to_sequences([normalized_text]) | |
| padded = pad_sequences(seq, maxlen=max_sequence_length) | |
| predictions = intent_model.predict(padded, verbose=0) | |
| confidence = float(np.max(predictions)) | |
| intent_index = int(np.argmax(predictions)) | |
| intent_label = label_encoder.inverse_transform([intent_index])[0] | |
| return str(intent_label), confidence | |
| except Exception as e: | |
| print(f"Intent Prediction Error: {e}", flush=True) | |
| return "unknown", 0.0 |