Update app.py
Browse files
app.py
CHANGED
|
@@ -40,8 +40,8 @@ all_max_len = 3000
|
|
| 40 |
|
| 41 |
|
| 42 |
# Initialize Pinecone client and create an index
|
| 43 |
-
pinecone.init(api_key=
|
| 44 |
-
index = pinecone.Index(index_name=
|
| 45 |
|
| 46 |
|
| 47 |
def get_emb(text):
|
|
@@ -192,40 +192,6 @@ def up_file(fls):
|
|
| 192 |
|
| 193 |
doc_text_list += res_list
|
| 194 |
|
| 195 |
-
#pptx Extracting
|
| 196 |
-
for i in pptx:
|
| 197 |
-
loader = UnstructuredPowerPointLoader(i)
|
| 198 |
-
data = loader.load()
|
| 199 |
-
# content = str(data).split("'")
|
| 200 |
-
# cnt = content[1]
|
| 201 |
-
# # c = cnt.split('\\n\\n')
|
| 202 |
-
# # final = "".join(c)
|
| 203 |
-
# c = cnt.replace('\\n\\n',"").replace("<PAGE BREAK>","").replace("\t","")
|
| 204 |
-
doc_text_list.append(data)
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
#Doc Extracting
|
| 209 |
-
for i in docs:
|
| 210 |
-
loader = UnstructuredWordDocumentLoader(i)
|
| 211 |
-
data = loader.load()
|
| 212 |
-
# content = str(data).split("'")
|
| 213 |
-
# cnt = content[1]
|
| 214 |
-
# # c = cnt.split('\\n\\n')
|
| 215 |
-
# # final = "".join(c)
|
| 216 |
-
# c = cnt.replace('\\n\\n',"").replace("<PAGE BREAK>","").replace("\t","")
|
| 217 |
-
doc_text_list.append(data)
|
| 218 |
-
|
| 219 |
-
# #Image Extraction
|
| 220 |
-
# for i in jpg:
|
| 221 |
-
# loader = UnstructuredImageLoader(i)
|
| 222 |
-
# data = loader.load()
|
| 223 |
-
# # content = str(data).split("'")
|
| 224 |
-
# # cnt = content[1]
|
| 225 |
-
# # # c = cnt.split('\\n\\n')
|
| 226 |
-
# # # final = "".join(c)
|
| 227 |
-
# # c = cnt.replace('\\n\\n',"").replace("<PAGE BREAK>","").replace("\t","")
|
| 228 |
-
# doc_text_list.append(data)
|
| 229 |
|
| 230 |
doc_text_list = [str(text).strip() for text in doc_text_list if len(str(text).strip()) > 0]
|
| 231 |
# print(doc_text_list)
|
|
@@ -257,7 +223,7 @@ def pine(data):
|
|
| 257 |
|
| 258 |
|
| 259 |
Embedding_model = "text-embedding-ada-002"
|
| 260 |
-
embeddings = OpenAIEmbeddings(openai_api_key="sk-
|
| 261 |
|
| 262 |
print(requests.post(url = chat_emd))
|
| 263 |
|
|
@@ -266,9 +232,7 @@ def pine(data):
|
|
| 266 |
# headers=headers
|
| 267 |
# )
|
| 268 |
|
| 269 |
-
pinecone.init(api_key = "ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",
|
| 270 |
-
environment = "us-west4-gcp-free"
|
| 271 |
-
)
|
| 272 |
|
| 273 |
index_name = "test"
|
| 274 |
docstore = Pinecone.from_texts([d for d in doc_spilt],embeddings,index_name = index_name,namespace='a1')
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
# Initialize Pinecone client and create an index
|
| 43 |
+
pinecone.init(api_key='d0a5b89b-b901-4b47-bc99-38b93695390d',environment = 'asia-southeast1-gcp')
|
| 44 |
+
index = pinecone.Index(index_name='test')
|
| 45 |
|
| 46 |
|
| 47 |
def get_emb(text):
|
|
|
|
| 192 |
|
| 193 |
doc_text_list += res_list
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
doc_text_list = [str(text).strip() for text in doc_text_list if len(str(text).strip()) > 0]
|
| 197 |
# print(doc_text_list)
|
|
|
|
| 223 |
|
| 224 |
|
| 225 |
Embedding_model = "text-embedding-ada-002"
|
| 226 |
+
embeddings = OpenAIEmbeddings(openai_api_key="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")
|
| 227 |
|
| 228 |
print(requests.post(url = chat_emd))
|
| 229 |
|
|
|
|
| 232 |
# headers=headers
|
| 233 |
# )
|
| 234 |
|
| 235 |
+
pinecone.init(api_key = "ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",environment = "us-west4-gcp-free")
|
|
|
|
|
|
|
| 236 |
|
| 237 |
index_name = "test"
|
| 238 |
docstore = Pinecone.from_texts([d for d in doc_spilt],embeddings,index_name = index_name,namespace='a1')
|