shivam701171 commited on
Commit
7d458e9
·
verified ·
1 Parent(s): a849f49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -645,7 +645,7 @@ class EnhancedPDFProcessor:
645
  not clean_text.isdigit() and # Not just numbers
646
  (word_count > 0 or len(clean_text) > 30)) # Has common words or substantial length
647
 
648
- def chunk_text(self, text: str, chunk_size: int = 400, overlap: int = 50) -> List[str]:
649
  """Split text into overlapping chunks"""
650
  if not text or len(text.strip()) < 50:
651
  return []
@@ -701,7 +701,7 @@ class GoogleT5Model:
701
  self.tokenizer = None
702
  self.available = False
703
  # Use even smaller model for HF Spaces compatibility
704
- self.model_name = "google/flan-t5-small"
705
 
706
  if libs.get('torch') and libs.get('transformers'):
707
  self._initialize_model()
 
645
  not clean_text.isdigit() and # Not just numbers
646
  (word_count > 0 or len(clean_text) > 30)) # Has common words or substantial length
647
 
648
+ def chunk_text(self, text: str, chunk_size: int = 300, overlap: int = 50) -> List[str]:
649
  """Split text into overlapping chunks"""
650
  if not text or len(text.strip()) < 50:
651
  return []
 
701
  self.tokenizer = None
702
  self.available = False
703
  # Use even smaller model for HF Spaces compatibility
704
+ self.model_name = "google/flan-t5-base"
705
 
706
  if libs.get('torch') and libs.get('transformers'):
707
  self._initialize_model()