Spaces:

averyestopinal
/

Week13

Paused

averye-duke commited on Nov 17, 2025

Commit

cbd72a8

1 Parent(s): 2b2b23b

fix chunking

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,9 @@
 import chromadb
 import gradio
-import nltk
 import sentence_transformers
 from chromadb.config import Settings
 import gradio as gr
-from nltk import sent_tokenize
-nltk.download('punkt')
-# The above code snippet was generated by ChatGPT 5.0 at 11:33 a 11/17/25
 # Provide your own dataset here. Decide on a chunking strategy and implement here.
 texts = [
@@ -48,11 +43,8 @@ texts = [
 ]
 # Chunking by sentence
-sentences = []
-for paragraph in texts:
-    s = sent_tokenize(paragraph)
-    sentences.extend(s)
-# The above code snippet was generated by ChatGPT 5.0 at 11:33 a 11/17/25
 # Generate embeddings
 from sentence_transformers import SentenceTransformer

 import chromadb
 import gradio
 import sentence_transformers
 from chromadb.config import Settings
 import gradio as gr
 # Provide your own dataset here. Decide on a chunking strategy and implement here.
 texts = [
 ]
 # Chunking by sentence
+sentences = [sentence for paragraph in texts for sentence in paragraph.split(". ")]
+# The above code snippet was generated by ChatGPT 5.0 at 3:31p 11/17/25
 # Generate embeddings
 from sentence_transformers import SentenceTransformer