nlp-ultimate-tutor

Build error

App Files Files Community

nlp-ultimate-tutor / utils /model_loader.py

aradhyapavan

nlp ultimate tutor

ca2c89c verified 5 months ago

raw

history blame contribute delete

8.54 kB

	import nltk
	import spacy
	from transformers import pipeline

	# Global models dictionary for persistent access
	models = {
	"nlp": None,
	"sentiment_analyzer": None,
	"emotion_classifier": None,
	"summarizer": None,
	"qa_pipeline": None,
	"translation_pipeline": None,
	"text_generator": None,
	"zero_shot": None,
	"embedding_model": None
	}

	def download_nltk_resources():
	"""Download and initialize NLTK resources"""
	resources = ['punkt', 'stopwords', 'vader_lexicon', 'wordnet', 'averaged_perceptron_tagger', 'sentiwordnet']
	for resource in resources:
	try:
	if resource == 'punkt':
	nltk.data.find(f'tokenizers/{resource}')
	elif resource in ['stopwords', 'wordnet']:
	nltk.data.find(f'corpora/{resource}')
	elif resource == 'vader_lexicon':
	nltk.data.find(f'sentiment/{resource}')
	elif resource == 'averaged_perceptron_tagger':
	nltk.data.find(f'taggers/{resource}')
	elif resource == 'sentiwordnet':
	nltk.data.find(f'corpora/{resource}')
	except LookupError:
	print(f"Downloading required NLTK resource: {resource}")
	nltk.download(resource)

	def load_spacy():
	"""Load spaCy model"""
	if models["nlp"] is None:
	try:
	models["nlp"] = spacy.load("en_core_web_sm")
	except:
	print("SpaCy model not found. Please run: python -m spacy download en_core_web_sm")
	return models["nlp"]

	def load_sentiment_analyzer():
	"""Load sentiment analysis model"""
	if models["sentiment_analyzer"] is None:
	try:
	models["sentiment_analyzer"] = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	except Exception as e:
	print(f"Failed to load sentiment analyzer: {e}")
	return models["sentiment_analyzer"]

	def load_emotion_classifier():
	"""Load emotion classification model"""
	if models["emotion_classifier"] is None:
	try:
	models["emotion_classifier"] = pipeline(
	"text-classification",
	model="cardiffnlp/twitter-roberta-base-emotion",
	return_all_scores=True
	)
	except Exception as e:
	print(f"Failed to load emotion classifier: {e}")
	return models["emotion_classifier"]

	def load_summarizer():
	"""Load summarization model"""
	if models["summarizer"] is None:
	try:
	models["summarizer"] = pipeline("summarization", model="facebook/bart-large-cnn")
	except Exception as e:
	print(f"Failed to load summarizer: {e}")
	return models["summarizer"]

	def load_qa_pipeline():
	"""Load or initialize the question answering pipeline."""
	if models["qa_pipeline"] is None:
	try:
	from transformers import pipeline

	# Use a smaller model to reduce memory usage and improve speed
	models["qa_pipeline"] = pipeline(
	"question-answering",
	model="deepset/roberta-base-squad2", # You can change this to a different model if needed
	tokenizer="deepset/roberta-base-squad2"
	)
	except Exception as e:
	print(f"Error loading QA pipeline: {e}")
	models["qa_pipeline"] = None
	raise e
	return models["qa_pipeline"]

	def load_translation_pipeline():
	"""Load translation model"""
	if models["translation_pipeline"] is None:
	try:
	models["translation_pipeline"] = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
	except Exception as e:
	print(f"Failed to load translation model: {e}")
	return models["translation_pipeline"]

	def load_translator(source_lang="auto", target_lang="en"):
	"""
	Load a machine translation model for the given language pair.

	Args:
	source_lang (str): Source language code, or 'auto' for automatic detection
	target_lang (str): Target language code

	Returns:
	A translation pipeline or model
	"""
	from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

	try:
	# For auto language detection, use a more general model
	if source_lang == "auto":
	# Using Helsinki-NLP's opus-mt model for translation
	model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
	translator = pipeline("translation", model=model_name)
	else:
	# For specific language pairs
	model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"

	# Load the model and tokenizer
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Create the translation pipeline
	translator = pipeline("translation", model=model, tokenizer=tokenizer)

	return translator
	except Exception as e:
	# Fallback to a more general model if language pair isn't available
	try:
	# Use MarianMT model for many language pairs
	model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
	translator = pipeline("translation", model=model_name)
	return translator
	except Exception as nested_e:
	# If all else fails, return a simple callable object that returns an error message
	class ErrorTranslator:
	def __call__(self, text, **kwargs):
	return [{"translation_text": f"Error loading translation model: {str(e)}. Fallback also failed: {str(nested_e)}"}]
	return ErrorTranslator()

	def load_text_generator():
	"""Load text generation model"""
	if models["text_generator"] is None:
	try:
	models["text_generator"] = pipeline("text-generation", model="gpt2")
	except Exception as e:
	print(f"Failed to load text generator: {e}")
	return models["text_generator"]

	def load_zero_shot():
	"""Load zero-shot classification model"""
	if models["zero_shot"] is None:
	try:
	models["zero_shot"] = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	except Exception as e:
	print(f"Failed to load zero-shot classifier: {e}")
	return models["zero_shot"]

	def load_embedding_model():
	"""Load sentence embedding model for semantic search"""
	if models.get("embedding_model") is None:
	try:
	from sentence_transformers import SentenceTransformer
	models["embedding_model"] = SentenceTransformer('all-MiniLM-L6-v2')
	except Exception as e:
	print(f"Failed to load embedding model: {e}")
	return models["embedding_model"]

	def initialize_all_models():
	"""Initialize all models for better performance"""
	print("Initializing NLP models...")

	# Download NLTK resources first
	download_nltk_resources()

	# Load spaCy model
	try:
	load_spacy()
	print("✓ spaCy model loaded")
	except Exception as e:
	print(f"✗ Failed to load spaCy: {e}")

	# Load transformer models (these might take time)
	models_to_load = [
	("Sentiment Analyzer", load_sentiment_analyzer),
	("Emotion Classifier", load_emotion_classifier),
	("Summarizer", load_summarizer),
	("QA Pipeline", load_qa_pipeline),
	("Text Generator", load_text_generator),
	("Zero-shot Classifier", load_zero_shot),
	("Embedding Model", load_embedding_model)
	]

	for name, loader_func in models_to_load:
	try:
	loader_func()
	print(f"✓ {name} loaded")
	except Exception as e:
	print(f"✗ Failed to load {name}: {e}")

	print("Model initialization complete!")

	def get_model_status():
	"""Get status of all models"""
	status = {}
	for model_name, model in models.items():
	status[model_name] = model is not None
	return status

	def clear_models():
	"""Clear all loaded models to free memory"""
	for key in models:
	models[key] = None
	print("All models cleared from memory")