jozzy commited on
Commit
c05488c
·
1 Parent(s): 7fe1405

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -44,21 +44,23 @@ def init_pinecone(index_name):
44
  return index
45
 
46
 
47
- def process_file(index_name, docs):
48
  index = init_pinecone(index_name)
49
  embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
50
-
51
- loader = DirectoryLoader(docs.name, glob="*.txt", loader_cls=TextLoader)
52
- documents = loader.load()
53
- #print(documents)
54
 
 
 
 
 
 
 
55
 
56
  #pipeline='zh_core_web_sm'
57
- splter = SpacyTextSplitter(chunk_size=1000,chunk_overlap=200)
58
- split_text = splter.split_documents(documents)
59
 
60
- for document in split_text:
61
- Pinecone.from_documents([document], embeddings, index_name=index_name)
 
62
 
63
  return list_pinecone(index_name)
64
 
 
44
  return index
45
 
46
 
47
+ def process_file(index_name, dir):
48
  index = init_pinecone(index_name)
49
  embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
50
+ splter = SpacyTextSplitter(chunk_size=1000,chunk_overlap=200)
 
 
 
51
 
52
+ for doc in dir:
53
+ loader = TextLoader(doc.name , encoding='utf8')
54
+ content = loader.load()
55
+ split_text = splter.split_documents(content)
56
+ for text in split_text:
57
+ Pinecone.from_documents([text], embeddings, index_name=index_name)
58
 
59
  #pipeline='zh_core_web_sm'
 
 
60
 
61
+
62
+
63
+
64
 
65
  return list_pinecone(index_name)
66