Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -13,6 +13,7 @@ from langchain import PromptTemplate, LLMChain
|
|
| 13 |
from langchain import HuggingFaceHub
|
| 14 |
from langchain.document_loaders import TextLoader
|
| 15 |
import torch
|
|
|
|
| 16 |
|
| 17 |
import requests
|
| 18 |
import random
|
|
@@ -110,6 +111,7 @@ async def pdf_file_qa_process(user_question: str, request: Request, file_to_proc
|
|
| 110 |
texts=temp_texts
|
| 111 |
initial_embeddings=get_embeddings(temp_texts)
|
| 112 |
db_embeddings = torch.FloatTensor(initial_embeddings)
|
|
|
|
| 113 |
print("db_embeddings created...")
|
| 114 |
|
| 115 |
#question = var_query.query
|
|
@@ -117,14 +119,19 @@ async def pdf_file_qa_process(user_question: str, request: Request, file_to_proc
|
|
| 117 |
print("API Call Query Received: "+question)
|
| 118 |
q_embedding=get_embeddings(question)
|
| 119 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
| 121 |
hits = semantic_search(final_q_embedding, torch.FloatTensor(db_embeddings), top_k=5)
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
page_contents = []
|
| 124 |
for i in range(len(hits[0])):
|
| 125 |
page_content = texts[hits[0][i]['corpus_id']]
|
| 126 |
-
page_contents.append(page_content)
|
| 127 |
-
|
| 128 |
print(page_contents)
|
| 129 |
|
| 130 |
temp_page_contents=str(page_contents)
|
|
@@ -136,8 +143,14 @@ async def pdf_file_qa_process(user_question: str, request: Request, file_to_proc
|
|
| 136 |
|
| 137 |
loader = TextLoader(file_path, encoding="utf-8")
|
| 138 |
loaded_documents = loader.load()
|
|
|
|
| 139 |
|
|
|
|
|
|
|
| 140 |
temp_ai_response = chain({"input_documents": loaded_documents, "question": question}, return_only_outputs=False)
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
initial_ai_response=temp_ai_response['output_text']
|
| 143 |
|
|
|
|
| 13 |
from langchain import HuggingFaceHub
|
| 14 |
from langchain.document_loaders import TextLoader
|
| 15 |
import torch
|
| 16 |
+
from sentence_transformers.util import semantic_search
|
| 17 |
|
| 18 |
import requests
|
| 19 |
import random
|
|
|
|
| 111 |
texts=temp_texts
|
| 112 |
initial_embeddings=get_embeddings(temp_texts)
|
| 113 |
db_embeddings = torch.FloatTensor(initial_embeddings)
|
| 114 |
+
print(db_embeddings)
|
| 115 |
print("db_embeddings created...")
|
| 116 |
|
| 117 |
#question = var_query.query
|
|
|
|
| 119 |
print("API Call Query Received: "+question)
|
| 120 |
q_embedding=get_embeddings(question)
|
| 121 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
| 122 |
+
print(final_q_embedding)
|
| 123 |
+
|
| 124 |
+
print("Semantic Similarity Search Starts...")
|
| 125 |
+
start_1 = timeit.default_timer()
|
| 126 |
hits = semantic_search(final_q_embedding, torch.FloatTensor(db_embeddings), top_k=5)
|
| 127 |
+
end_1 = timeit.default_timer()
|
| 128 |
+
print("Semantic Similarity Search Ends...")
|
| 129 |
+
print(f'Semantic Similarity Search共耗时: @ {end_1 - start_1}')
|
| 130 |
|
| 131 |
page_contents = []
|
| 132 |
for i in range(len(hits[0])):
|
| 133 |
page_content = texts[hits[0][i]['corpus_id']]
|
| 134 |
+
page_contents.append(page_content)
|
|
|
|
| 135 |
print(page_contents)
|
| 136 |
|
| 137 |
temp_page_contents=str(page_contents)
|
|
|
|
| 143 |
|
| 144 |
loader = TextLoader(file_path, encoding="utf-8")
|
| 145 |
loaded_documents = loader.load()
|
| 146 |
+
print(loaded_documents)
|
| 147 |
|
| 148 |
+
print("LLM Chain Starts...")
|
| 149 |
+
start_2 = timeit.default_timer()
|
| 150 |
temp_ai_response = chain({"input_documents": loaded_documents, "question": question}, return_only_outputs=False)
|
| 151 |
+
end_2 = timeit.default_timer()
|
| 152 |
+
print("LLM Chain Ends...")
|
| 153 |
+
print(f'LLM Chain共耗时: @ {end_2 - start_2}')
|
| 154 |
|
| 155 |
initial_ai_response=temp_ai_response['output_text']
|
| 156 |
|