Spaces:
Runtime error
Runtime error
Update src/utils/ingest_text.py
Browse files- src/utils/ingest_text.py +6 -2
src/utils/ingest_text.py
CHANGED
|
@@ -8,6 +8,9 @@ from langchain_community.document_loaders.directory import DirectoryLoader
|
|
| 8 |
import os
|
| 9 |
from fastembed import TextEmbedding
|
| 10 |
from typing import List
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
import nest_asyncio
|
| 13 |
nest_asyncio.apply()
|
|
@@ -18,8 +21,8 @@ llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
|
|
| 18 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
| 19 |
|
| 20 |
|
| 21 |
-
parsed_data_file = r"
|
| 22 |
-
output_md = r"
|
| 23 |
loki = r"data"
|
| 24 |
|
| 25 |
import pickle
|
|
@@ -61,6 +64,7 @@ def create_vector_database(loc):
|
|
| 61 |
|
| 62 |
"""
|
| 63 |
# Call the function to either load or parse the data
|
|
|
|
| 64 |
llama_parse_documents = load_or_parse_data(loc)
|
| 65 |
#print(llama_parse_documents[1].text[:100])
|
| 66 |
|
|
|
|
| 8 |
import os
|
| 9 |
from fastembed import TextEmbedding
|
| 10 |
from typing import List
|
| 11 |
+
import nltk
|
| 12 |
+
nltk.download('punkt')
|
| 13 |
+
|
| 14 |
|
| 15 |
import nest_asyncio
|
| 16 |
nest_asyncio.apply()
|
|
|
|
| 21 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
| 22 |
|
| 23 |
|
| 24 |
+
parsed_data_file = r"data/parsed_data.pkl"
|
| 25 |
+
output_md = r"data/output.md"
|
| 26 |
loki = r"data"
|
| 27 |
|
| 28 |
import pickle
|
|
|
|
| 64 |
|
| 65 |
"""
|
| 66 |
# Call the function to either load or parse the data
|
| 67 |
+
print("text_db")
|
| 68 |
llama_parse_documents = load_or_parse_data(loc)
|
| 69 |
#print(llama_parse_documents[1].text[:100])
|
| 70 |
|