Spaces:

shivam701171
/

Gradio_deploy

Sleeping

shivam701171 commited on Jun 11, 2025

Commit

7d458e9

verified ·

1 Parent(s): a849f49

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -645,7 +645,7 @@ class EnhancedPDFProcessor:
                 not clean_text.isdigit() and  # Not just numbers
                 (word_count > 0 or len(clean_text) > 30))  # Has common words or substantial length
-    def chunk_text(self, text: str, chunk_size: int = 400, overlap: int = 50) -> List[str]:
         """Split text into overlapping chunks"""
         if not text or len(text.strip()) < 50:
             return []
@@ -701,7 +701,7 @@ class GoogleT5Model:
         self.tokenizer = None
         self.available = False
         # Use even smaller model for HF Spaces compatibility
-        self.model_name = "google/flan-t5-small"
         if libs.get('torch') and libs.get('transformers'):
             self._initialize_model()

                 not clean_text.isdigit() and  # Not just numbers
                 (word_count > 0 or len(clean_text) > 30))  # Has common words or substantial length
+    def chunk_text(self, text: str, chunk_size: int = 300, overlap: int = 50) -> List[str]:
         """Split text into overlapping chunks"""
         if not text or len(text.strip()) < 50:
             return []
         self.tokenizer = None
         self.available = False
         # Use even smaller model for HF Spaces compatibility
+        self.model_name = "google/flan-t5-base"
         if libs.get('torch') and libs.get('transformers'):
             self._initialize_model()