SCANSKY
/

BERTopic-Tourism-English

@@ -23,38 +23,41 @@ class EndpointHandler:
     def inference(self, text_input):
         """
         Perform inference using the BERTopic model.
-        - Process the text input and generate topic predictions.
         """
         try:
-            # Split text into documents (assuming one document per line)
-            docs = text_input.strip().split('\n')
-            # Perform topic inference
-            topics, probabilities = self.topic_model.transform(docs)
             # Prepare the results
             results = []
             for topic, prob in zip(topics, probabilities):
                 topic_info = self.topic_model.get_topic(topic)
                 topic_words = [word for word, _ in topic_info] if topic_info else []
-                # Get custom label for the topic (with fallback if custom_labels_ is not available)
                 if hasattr(self.topic_model, "custom_labels_") and self.topic_model.custom_labels_ is not None:
                     custom_label = self.topic_model.custom_labels_[topic + 1]
                 else:
                     custom_label = f"Topic {topic}"  # Fallback label
                 results.append({
                     "topic": int(topic),
                     "probability": float(prob),
                     "top_words": topic_words[:5],  # Top 5 words
                     "customLabel": custom_label  # Add custom label
                 })
             return results
         except Exception as e:
             raise ValueError(f"Error during inference: {str(e)}")
     def postprocess(self, results):
         """
         Postprocess the inference results into a JSON-serializable list.

     def inference(self, text_input):
         """
         Perform inference using the BERTopic model.
+        - Combine all sentences into a single document and find shared topics.
         """
         try:
+            # Split text into sentences (assuming one sentence per line)
+            sentences = text_input.strip().split('\n')
+            # Combine all sentences into a single document
+            combined_document = " ".join(sentences)
+            # Perform topic inference on the combined document
+            topics, probabilities = self.topic_model.transform([combined_document])
             # Prepare the results
             results = []
             for topic, prob in zip(topics, probabilities):
                 topic_info = self.topic_model.get_topic(topic)
                 topic_words = [word for word, _ in topic_info] if topic_info else []
+                # Get custom label for the topic
                 if hasattr(self.topic_model, "custom_labels_") and self.topic_model.custom_labels_ is not None:
                     custom_label = self.topic_model.custom_labels_[topic + 1]
                 else:
                     custom_label = f"Topic {topic}"  # Fallback label
                 results.append({
                     "topic": int(topic),
                     "probability": float(prob),
                     "top_words": topic_words[:5],  # Top 5 words
                     "customLabel": custom_label  # Add custom label
                 })
             return results
         except Exception as e:
             raise ValueError(f"Error during inference: {str(e)}")
     def postprocess(self, results):
         """
         Postprocess the inference results into a JSON-serializable list.