Spaces:

gmustafa413
/

Multi-Format-Reader

Runtime error

gmustafa413 commited on Mar 7, 2025

Commit

c3e0ba9

verified ·

1 Parent(s): ed7cd09

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ from concurrent.futures import ThreadPoolExecutor
 # Configuration
 GROQ_API_KEY = "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"  # 🔑 REPLACE WITH YOUR ACTUAL KEY
 MODEL_NAME = "all-MiniLM-L6-v2"
-CHUNK_SIZE = 512
 MAX_TOKENS = 4096
 MODEL = SentenceTransformer(MODEL_NAME)
 WORKERS = 8
@@ -84,7 +84,7 @@ class DocumentProcessor:
     def semantic_chunking(self, text):
         words = re.findall(r'\S+\s*', text)
         chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
-        return chunks[:1000]  # Limit to 1000 chunks per document
     def process_documents(self, files):
         self.chunks = []

 # Configuration
 GROQ_API_KEY = "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"  # 🔑 REPLACE WITH YOUR ACTUAL KEY
 MODEL_NAME = "all-MiniLM-L6-v2"
+CHUNK_SIZE = 256
 MAX_TOKENS = 4096
 MODEL = SentenceTransformer(MODEL_NAME)
 WORKERS = 8
     def semantic_chunking(self, text):
         words = re.findall(r'\S+\s*', text)
         chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
+        return chunks[:]  # Limit to 1000 chunks per document
     def process_documents(self, files):
         self.chunks = []