SCANSKY
/

BERTopic_Tourism_8L

Text Classification

Model card Files Files and versions

BERTopic_Tourism_8L / handler.py

SCANSKY's picture

Update handler.py

e4797c9 verified about 1 year ago

4.01 kB

	import json

	from bertopic import BERTopic

	class EndpointHandler:
	def __init__(self, model_path="SCANSKY/BERTopic_Tourism_8L"):
	"""
	Initialize the handler. Load the BERTopic model from Hugging Face.
	"""
	self.topic_model = BERTopic.load(model_path)

	def preprocess(self, data):
	"""
	Preprocess the incoming request data.
	- Extract text input from the request.
	"""
	try:
	# Directly work with the incoming data dictionary
	text_input = data.get("inputs", "")
	return text_input
	except Exception as e:
	raise ValueError(f"Error during preprocessing: {str(e)}")

	def inference(self, text_input):
	"""
	Perform inference using the BERTopic model.
	- Combine all sentences into a single document and find shared topics.
	"""
	try:
	# Split text into sentences (assuming one sentence per line)
	sentences = text_input.strip().split('\n')

	# Combine all sentences into a single document
	combined_document = " ".join(sentences)

	# Perform topic inference on the combined document
	topics, probabilities = self.topic_model.transform([combined_document])

	# Perform approximate distribution to get detailed topic contributions
	appxtopics, appxprobabilities = self.topic_model.approximate_distribution(
	combined_document, window=16, batch_size=16
	)
	doc_topic_distribution = appxtopics[0]

	# Rank topics by their contribution in descending order
	ranked_topics = sorted(enumerate(doc_topic_distribution), key=lambda x: x[1], reverse=True)[:10]
	customlabels = self.topic_model.custom_labels_

	distributedtopics = [
	[f"Topic {topic_idx}", customlabels[topic_idx + 1], f"{round(contribution * 100, 2)}%"]
	for topic_idx, contribution in ranked_topics
	]
	# Prepare the results
	results = []
	for topic, prob in zip(topics, probabilities):
	topic_info = self.topic_model.get_topic(topic)
	topic_words = [word for word, _ in topic_info] if topic_info else []

	# Get custom label for the topic
	if hasattr(self.topic_model, "custom_labels_") and self.topic_model.custom_labels_ is not None:
	custom_label = self.topic_model.custom_labels_[topic + 1]
	else:
	custom_label = f"Topic {topic}" # Fallback label

	# Get the contribution from approximate distribution
	# contribution = next((contribution for idx, contribution in ranked_topics if idx == topic), 0.0)

	results.append({
	"topic": int(topic),
	"probability": float(prob),
	"top_words": topic_words[:5], # Top 5 words
	"customLabel": custom_label, # Add custom label
	"contribution": distributedtopics
	})

	return results
	except Exception as e:
	raise ValueError(f"Error during inference: {str(e)}")

	def postprocess(self, results):
	"""
	Postprocess the inference results into a JSON-serializable list.
	"""
	return results # Directly returning the list of results

	def __call__(self, data):
	"""
	Handle the incoming request.
	"""
	try:
	# Preprocess the data
	text_input = self.preprocess(data)

	# Perform inference
	results = self.inference(text_input)

	# Postprocess the results
	response = self.postprocess(results)

	return response
	except Exception as e:
	return [{"error": str(e)}] # Returning error as a list with a dictionary