Davide Panza commited on
Commit
60710f0
·
verified ·
1 Parent(s): e9dc024

Update app/backend/text_processing.py

Browse files
Files changed (1) hide show
  1. app/backend/text_processing.py +7 -3
app/backend/text_processing.py CHANGED
@@ -1,9 +1,13 @@
1
- from nltk.tokenize import sent_tokenize
2
- import nltk
3
  import streamlit as st
4
  import os
 
 
 
 
 
5
 
6
- nltk.data.path.append(os.path.join(os.path.dirname(__file__), "..", "nltk_data"))
7
 
8
  def text_chunking(text, max_words=750, min_words=400, overlap_sentences=5):
9
  """
 
1
+
 
2
  import streamlit as st
3
  import os
4
+ import nltk
5
+
6
+ # Tell NLTK where to look for the punkt tokenizer
7
+ nltk_path = os.path.join(os.getcwd(), "nltk_data")
8
+ nltk.data.path.append(nltk_path)
9
 
10
+ from nltk.tokenize import sent_tokenize
11
 
12
  def text_chunking(text, max_words=750, min_words=400, overlap_sentences=5):
13
  """