Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,8 @@ import nltk
|
|
| 6 |
import gradio as gr
|
| 7 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 8 |
from langchain.vectorstores import FAISS
|
| 9 |
-
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
|
|
| 10 |
from sentence_transformers import SentenceTransformer
|
| 11 |
from transformers import AutoTokenizer
|
| 12 |
from nltk import sent_tokenize
|
|
@@ -78,8 +79,8 @@ def process_files(model_name, split_strategy, chunk_size=500, overlap_size=50, m
|
|
| 78 |
text += FileHandler.extract_text(file_path)
|
| 79 |
|
| 80 |
# Split text
|
| 81 |
-
if split_strategy == '
|
| 82 |
-
splitter =
|
| 83 |
else:
|
| 84 |
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size)
|
| 85 |
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 8 |
from langchain.vectorstores import FAISS
|
| 9 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
+
from langchain.text_splitters import TokenTextSplitter
|
| 11 |
from sentence_transformers import SentenceTransformer
|
| 12 |
from transformers import AutoTokenizer
|
| 13 |
from nltk import sent_tokenize
|
|
|
|
| 79 |
text += FileHandler.extract_text(file_path)
|
| 80 |
|
| 81 |
# Split text
|
| 82 |
+
if split_strategy == 'token':
|
| 83 |
+
splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size)
|
| 84 |
else:
|
| 85 |
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size)
|
| 86 |
|