Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 17 |
# Configuration
|
| 18 |
GROQ_API_KEY = "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" # 🔑 REPLACE WITH YOUR ACTUAL KEY
|
| 19 |
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 20 |
-
CHUNK_SIZE =
|
| 21 |
MAX_TOKENS = 4096
|
| 22 |
MODEL = SentenceTransformer(MODEL_NAME)
|
| 23 |
WORKERS = 8
|
|
@@ -84,7 +84,7 @@ class DocumentProcessor:
|
|
| 84 |
def semantic_chunking(self, text):
|
| 85 |
words = re.findall(r'\S+\s*', text)
|
| 86 |
chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
|
| 87 |
-
return chunks[:
|
| 88 |
|
| 89 |
def process_documents(self, files):
|
| 90 |
self.chunks = []
|
|
|
|
| 17 |
# Configuration
|
| 18 |
GROQ_API_KEY = "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" # 🔑 REPLACE WITH YOUR ACTUAL KEY
|
| 19 |
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 20 |
+
CHUNK_SIZE = 256
|
| 21 |
MAX_TOKENS = 4096
|
| 22 |
MODEL = SentenceTransformer(MODEL_NAME)
|
| 23 |
WORKERS = 8
|
|
|
|
| 84 |
def semantic_chunking(self, text):
|
| 85 |
words = re.findall(r'\S+\s*', text)
|
| 86 |
chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
|
| 87 |
+
return chunks[:] # Limit to 1000 chunks per document
|
| 88 |
|
| 89 |
def process_documents(self, files):
|
| 90 |
self.chunks = []
|