averye-duke commited on
Commit
cbd72a8
·
1 Parent(s): 2b2b23b

fix chunking

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -1,14 +1,9 @@
1
  import chromadb
2
  import gradio
3
- import nltk
4
  import sentence_transformers
5
  from chromadb.config import Settings
6
  import gradio as gr
7
 
8
- from nltk import sent_tokenize
9
-
10
- nltk.download('punkt')
11
- # The above code snippet was generated by ChatGPT 5.0 at 11:33 a 11/17/25
12
 
13
  # Provide your own dataset here. Decide on a chunking strategy and implement here.
14
  texts = [
@@ -48,11 +43,8 @@ texts = [
48
  ]
49
 
50
  # Chunking by sentence
51
- sentences = []
52
- for paragraph in texts:
53
- s = sent_tokenize(paragraph)
54
- sentences.extend(s)
55
- # The above code snippet was generated by ChatGPT 5.0 at 11:33 a 11/17/25
56
 
57
  # Generate embeddings
58
  from sentence_transformers import SentenceTransformer
 
1
  import chromadb
2
  import gradio
 
3
  import sentence_transformers
4
  from chromadb.config import Settings
5
  import gradio as gr
6
 
 
 
 
 
7
 
8
  # Provide your own dataset here. Decide on a chunking strategy and implement here.
9
  texts = [
 
43
  ]
44
 
45
  # Chunking by sentence
46
+ sentences = [sentence for paragraph in texts for sentence in paragraph.split(". ")]
47
+ # The above code snippet was generated by ChatGPT 5.0 at 3:31p 11/17/25
 
 
 
48
 
49
  # Generate embeddings
50
  from sentence_transformers import SentenceTransformer