Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

App Files Files Community

DocUA commited on Dec 12, 2025

Commit

fd0d61e

1 Parent(s): 65b972a

Implement batch classification for uploaded datasets and enhance export functionality with reasoning

Browse files

Files changed (1) hide show

src/interface/file_upload_interface.py +129 -33

src/interface/file_upload_interface.py CHANGED Viewed

@@ -361,6 +361,109 @@ class FileUploadInterfaceController(ProgressTrackingMixin):
         classification["indicators"] = indicators
         return classification
     def submit_batch_verification(self, is_correct: bool, correction: Optional[str] = None, notes: str = "") -> Tuple[bool, str, Dict[str, Any]]:
         """
@@ -856,47 +959,40 @@ def create_file_upload_interface() -> gr.Blocks:
             success, message, session = controller.start_batch_processing(verifier_name, file_result)
             if success:
-                # Load first message
-                current_message, classification_result = controller.get_current_message_for_batch_processing()
-                if current_message:
-                    # Format displays
-                    expected_badge = controller._get_classification_badge(current_message.pre_classified_label)
-                    actual_badge = controller._get_classification_badge(classification_result.get('decision', 'unknown'))
-                    confidence_text = f"📊 {classification_result.get('confidence', 0) * 100:.1f}% confident"
-                    indicators_text = "🔍 " + ", ".join(classification_result.get('indicators', ['No indicators']))
-                    progress_text = f"Progress: 1 of {len(file_result.parsed_test_cases)} messages"
-                    return (
-                        gr.Row(visible=True),       # message_processing_section
-                        session,                    # current_session_state
-                        current_message.text,       # current_message_display
-                        f"Expected: {expected_badge}",  # expected_classification_display
-                        f"AI Result: {actual_badge}",   # actual_classification_display
-                        confidence_text,            # classifier_confidence_display
-                        indicators_text,            # classifier_indicators_display
-                        progress_text,              # batch_progress_display
-                        gr.DownloadButton(visible=False),  # export_csv_btn
-                        gr.DownloadButton(visible=False),  # export_json_btn
-                        gr.DownloadButton(visible=False),  # export_xlsx_btn
-                        message                     # status_message
-                    )
-                else:
                     return (
-                        gr.Row(visible=False),      # message_processing_section
                         session,                    # current_session_state
                         "",                         # current_message_display
                         "",                         # expected_classification_display
                         "",                         # actual_classification_display
                         "",                         # classifier_confidence_display
                         "",                         # classifier_indicators_display
-                        "No messages to process",   # batch_progress_display
-                        gr.DownloadButton(visible=False),  # export_csv_btn
-                        gr.DownloadButton(visible=False),  # export_json_btn
-                        gr.DownloadButton(visible=False),  # export_xlsx_btn
-                        "❌ No messages in file"    # status_message
                     )
             else:
                 return (
                     gr.Row(visible=False),      # message_processing_section

         classification["indicators"] = indicators
         return classification
+    def run_batch_classification(self) -> Tuple[bool, str, Dict[str, Any]]:
+        """Run classification for the whole uploaded dataset and persist results.
+        File Upload Mode is already labeled (ground truth provided in the file), so we
+        don't need interactive message-by-message verification. Instead, we:
+        - classify every message
+        - store the model output as reasoning in `verifier_notes`
+        - mark each record as correct/incorrect by comparing to ground truth
+        """
+        if not self.current_session or not self.current_file_result:
+            return False, "❌ No active session", {}
+        total = len(self.current_file_result.parsed_test_cases)
+        if total == 0:
+            return False, "❌ No messages to process", {}
+        try:
+            # Reset any prior run state
+            self.current_session.verifications = []
+            self.current_session.verified_count = 0
+            self.current_session.correct_count = 0
+            self.current_session.incorrect_count = 0
+            self.current_session.verified_message_ids = []
+            self.setup_progress_tracking(total)
+            for idx, test_message in enumerate(self.current_file_result.parsed_test_cases):
+                self.batch_processing_start_time = datetime.now()
+                user_prompt = (
+                    "Please analyze this patient message for spiritual distress:\n\n"
+                    f"{test_message.text}"
+                )
+                raw_response = self.ai_client.call_spiritual_api(
+                    system_prompt=SYSTEM_PROMPT_ENTRY_CLASSIFIER,
+                    user_prompt=user_prompt,
+                    temperature=0.3,
+                )
+                classification_result = self._parse_classification_response(raw_response)
+                classifier_decision = classification_result.get("decision", "green")
+                if classifier_decision not in ["green", "yellow", "red"]:
+                    classifier_decision = "green"
+                ground_truth = test_message.pre_classified_label
+                if ground_truth not in ["green", "yellow", "red"]:
+                    ground_truth = "green"
+                is_correct = classifier_decision == ground_truth
+                verification_record = VerificationRecord(
+                    message_id=test_message.message_id,
+                    original_message=test_message.text,
+                    classifier_decision=classifier_decision,
+                    classifier_confidence=classification_result.get("confidence", 0.0),
+                    classifier_indicators=classification_result.get("indicators", []),
+                    ground_truth_label=ground_truth,
+                    verifier_notes=raw_response,  # store full LLM output as reasoning
+                    is_correct=is_correct,
+                )
+                self.current_session.verifications.append(verification_record)
+                self.current_session.verified_count += 1
+                self.current_session.verified_message_ids.append(test_message.message_id)
+                if is_correct:
+                    self.current_session.correct_count += 1
+                else:
+                    self.current_session.incorrect_count += 1
+                self.record_verification_with_timing(is_correct, self.batch_processing_start_time)
+                self.current_session.current_queue_index = idx + 1
+            self.current_session.is_complete = True
+            self.current_session.completed_at = datetime.now()
+            self.store.save_session(self.current_session)
+            accuracy = (
+                (self.current_session.correct_count / self.current_session.verified_count * 100)
+                if self.current_session.verified_count
+                else 0
+            )
+            stats = {
+                "processed": self.current_session.verified_count,
+                "total": total,
+                "correct": self.current_session.correct_count,
+                "incorrect": self.current_session.incorrect_count,
+                "accuracy": accuracy,
+                "is_complete": True,
+            }
+            return True, f"✅ Batch classification completed. Accuracy: {accuracy:.1f}%", stats
+        except Exception as e:
+            return False, f"❌ Error during batch classification: {str(e)}", {}
+    def export_batch_results_with_reasoning(self, format_type: str) -> Tuple[bool, str, Optional[str]]:
+        """Export results including LLM reasoning.
+        We rely on `verifier_notes` field to carry reasoning (raw model output).
+        """
+        return self.export_batch_results(format_type)
     def submit_batch_verification(self, is_correct: bool, correction: Optional[str] = None, notes: str = "") -> Tuple[bool, str, Dict[str, Any]]:
         """
             success, message, session = controller.start_batch_processing(verifier_name, file_result)
             if success:
+                # Simplified behavior: dataset is already labeled, so run full batch
+                # classification immediately and generate results for export.
+                run_ok, run_msg, stats = controller.run_batch_classification()
+                if run_ok:
+                    progress_text = f"✅ Completed: {stats.get('processed', 0)}/{stats.get('total', 0)} messages"
                     return (
+                        gr.Row(visible=False),      # message_processing_section (not used in simplified flow)
                         session,                    # current_session_state
                         "",                         # current_message_display
                         "",                         # expected_classification_display
                         "",                         # actual_classification_display
                         "",                         # classifier_confidence_display
                         "",                         # classifier_indicators_display
+                        progress_text,              # batch_progress_display
+                        gr.DownloadButton(visible=True),  # export_csv_btn
+                        gr.DownloadButton(visible=True),  # export_json_btn
+                        gr.DownloadButton(visible=True),  # export_xlsx_btn
+                        run_msg                     # status_message
                     )
+                return (
+                    gr.Row(visible=False),      # message_processing_section
+                    session,                    # current_session_state
+                    "",                         # current_message_display
+                    "",                         # expected_classification_display
+                    "",                         # actual_classification_display
+                    "",                         # classifier_confidence_display
+                    "",                         # classifier_indicators_display
+                    "❌ Batch classification failed",  # batch_progress_display
+                    gr.DownloadButton(visible=False),  # export_csv_btn
+                    gr.DownloadButton(visible=False),  # export_json_btn
+                    gr.DownloadButton(visible=False),  # export_xlsx_btn
+                    run_msg                     # status_message
+                )
             else:
                 return (
                     gr.Row(visible=False),      # message_processing_section