Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import os
|
| 2 |
-
import PyPDF2
|
| 3 |
import openai
|
| 4 |
from llama_index import download_loader, set_global_service_context, VectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage
|
| 5 |
from llama_index.readers import SimpleDirectoryReader
|
|
@@ -26,36 +25,6 @@ llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=MODEL_NAME, ma
|
|
| 26 |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
|
| 27 |
# set_global_service_context(service_context)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
def read_text_from_pdf(pdf_path):
|
| 31 |
-
with open(pdf_path, "rb") as file:
|
| 32 |
-
reader = PyPDF2.PdfReader(file)
|
| 33 |
-
text = ""
|
| 34 |
-
for page in range(len(reader.pages)):
|
| 35 |
-
text += reader.pages[page].extract_text()
|
| 36 |
-
return text
|
| 37 |
-
|
| 38 |
-
def read_text_from_txt(txt_path):
|
| 39 |
-
with open(txt_path, "r") as file:
|
| 40 |
-
text = file.read()
|
| 41 |
-
return text
|
| 42 |
-
|
| 43 |
-
def load_documents(directory_path):
|
| 44 |
-
documents = []
|
| 45 |
-
docmap = {}
|
| 46 |
-
for file_name in os.listdir(directory_path):
|
| 47 |
-
file_path = os.path.join(directory_path, file_name)
|
| 48 |
-
if file_name.endswith(".pdf"):
|
| 49 |
-
text = read_text_from_pdf(file_path)
|
| 50 |
-
elif file_name.endswith(".txt"):
|
| 51 |
-
text = read_text_from_txt(file_path)
|
| 52 |
-
else:
|
| 53 |
-
continue
|
| 54 |
-
#documents.append({"text": text, "file_name": file_name})
|
| 55 |
-
docmap["text"] = text
|
| 56 |
-
#return documents
|
| 57 |
-
return docmap
|
| 58 |
-
|
| 59 |
def construct_index(directory_path):
|
| 60 |
|
| 61 |
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import openai
|
| 3 |
from llama_index import download_loader, set_global_service_context, VectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage
|
| 4 |
from llama_index.readers import SimpleDirectoryReader
|
|
|
|
| 25 |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
|
| 26 |
# set_global_service_context(service_context)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def construct_index(directory_path):
|
| 29 |
|
| 30 |
|