Spaces:
Sleeping
Sleeping
Update split_files_to_excel.py
Browse files- split_files_to_excel.py +6 -1
split_files_to_excel.py
CHANGED
|
@@ -26,6 +26,7 @@ from pypdf import PdfReader
|
|
| 26 |
import pandas as pd
|
| 27 |
|
| 28 |
|
|
|
|
| 29 |
MODEL = "thenlper/gte-base"
|
| 30 |
CHUNK_SIZE = 1000
|
| 31 |
CHUNK_OVERLAP = 200
|
|
@@ -35,11 +36,15 @@ embeddings = HuggingFaceEmbeddings(
|
|
| 35 |
cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
|
| 36 |
)
|
| 37 |
|
|
|
|
|
|
|
| 38 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
|
|
|
| 39 |
|
| 40 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 41 |
model_id,
|
| 42 |
-
padding_side="left"
|
|
|
|
| 43 |
)
|
| 44 |
|
| 45 |
text_splitter = CharacterTextSplitter(
|
|
|
|
| 26 |
import pandas as pd
|
| 27 |
|
| 28 |
|
| 29 |
+
|
| 30 |
MODEL = "thenlper/gte-base"
|
| 31 |
CHUNK_SIZE = 1000
|
| 32 |
CHUNK_OVERLAP = 200
|
|
|
|
| 36 |
cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
|
| 37 |
)
|
| 38 |
|
| 39 |
+
|
| 40 |
+
|
| 41 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
| 42 |
+
acces_token = os.getenv("HUGGINGFACE_SPLITFILES_API_KEY")
|
| 43 |
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 45 |
model_id,
|
| 46 |
+
padding_side="left",
|
| 47 |
+
token = access_token
|
| 48 |
)
|
| 49 |
|
| 50 |
text_splitter = CharacterTextSplitter(
|