gmustafa413 commited on
Commit
c3e0ba9
·
verified ·
1 Parent(s): ed7cd09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,7 @@ from concurrent.futures import ThreadPoolExecutor
17
  # Configuration
18
  GROQ_API_KEY = "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" # 🔑 REPLACE WITH YOUR ACTUAL KEY
19
  MODEL_NAME = "all-MiniLM-L6-v2"
20
- CHUNK_SIZE = 512
21
  MAX_TOKENS = 4096
22
  MODEL = SentenceTransformer(MODEL_NAME)
23
  WORKERS = 8
@@ -84,7 +84,7 @@ class DocumentProcessor:
84
  def semantic_chunking(self, text):
85
  words = re.findall(r'\S+\s*', text)
86
  chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
87
- return chunks[:1000] # Limit to 1000 chunks per document
88
 
89
  def process_documents(self, files):
90
  self.chunks = []
 
17
  # Configuration
18
  GROQ_API_KEY = "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" # 🔑 REPLACE WITH YOUR ACTUAL KEY
19
  MODEL_NAME = "all-MiniLM-L6-v2"
20
+ CHUNK_SIZE = 256
21
  MAX_TOKENS = 4096
22
  MODEL = SentenceTransformer(MODEL_NAME)
23
  WORKERS = 8
 
84
  def semantic_chunking(self, text):
85
  words = re.findall(r'\S+\s*', text)
86
  chunks = [''.join(words[i:i+CHUNK_SIZE//2]) for i in range(0, len(words), CHUNK_SIZE//2)]
87
+ return chunks[:] # Limit to 1000 chunks per document
88
 
89
  def process_documents(self, files):
90
  self.chunks = []