Spaces:
Sleeping
Sleeping
use pre-built vector table for performance.
Browse files- app.py +6 -14
- vector_database.pkl +3 -0
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from transformers import BertModel, BertTokenizer
|
|
| 8 |
#import gzip
|
| 9 |
#import pandas as pd
|
| 10 |
import requests
|
| 11 |
-
|
| 12 |
|
| 13 |
class EmbeddingModel(nn.Module):
|
| 14 |
def __init__(self, bertName = "bert-base-uncased"): # other bert models can also be supported
|
|
@@ -117,35 +117,27 @@ class TrainStudent(nn.Module):
|
|
| 117 |
|
| 118 |
student_model=torch.load("myTextEmbeddingStudent.pt",map_location='cpu').student_model.eval()
|
| 119 |
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
#new_chunk_data = []
|
| 122 |
-
#new_chunk_emb = tensor([])
|
| 123 |
def addNewConcepts(user_concepts):
|
| 124 |
|
| 125 |
return user_concepts
|
| 126 |
|
| 127 |
def search(input, user_concepts):
|
| 128 |
|
| 129 |
-
|
| 130 |
-
new_chunk_data = generate_chunk_data(user_concepts.split(","))
|
| 131 |
-
new_chunk_emb = generate_chunk_emb(student_model, new_chunk_data)
|
| 132 |
-
result = search_document(input, new_chunk_data, new_chunk_emb, student_model)
|
| 133 |
-
else:
|
| 134 |
-
chunk_data = generate_chunk_data(["machine learning","moon","brain"])
|
| 135 |
-
# create the embedding vector database
|
| 136 |
-
chunk_emb = generate_chunk_emb(student_model, chunk_data)
|
| 137 |
-
result = search_document(input, chunk_data, chunk_emb, student_model)
|
| 138 |
|
| 139 |
return " ".join(result)
|
| 140 |
|
| 141 |
with gr.Blocks() as demo:
|
| 142 |
gr.HTML("""<h1 align="center">Sentence Embedding and Vector Database</h1>""")
|
| 143 |
-
|
| 144 |
search_result = gr.Textbox(show_label=False, placeholder="Search Result", lines=8)
|
| 145 |
|
| 146 |
with gr.Row():
|
| 147 |
with gr.Column(scale=1):
|
| 148 |
-
new_concept_box = gr.Textbox(show_label=False, placeholder="
|
| 149 |
#addConceptBtn = gr.Button("Add concepts")
|
| 150 |
with gr.Column(scale=4):
|
| 151 |
user_input = gr.Textbox(show_label=False, placeholder="Enter question on the concept...", lines=8)
|
|
|
|
| 8 |
#import gzip
|
| 9 |
#import pandas as pd
|
| 10 |
import requests
|
| 11 |
+
import pickle
|
| 12 |
|
| 13 |
class EmbeddingModel(nn.Module):
|
| 14 |
def __init__(self, bertName = "bert-base-uncased"): # other bert models can also be supported
|
|
|
|
| 117 |
|
| 118 |
student_model=torch.load("myTextEmbeddingStudent.pt",map_location='cpu').student_model.eval()
|
| 119 |
|
| 120 |
+
with open("vector_database.pkl","rb") as f:
|
| 121 |
+
vector_database=pickle.load(f)
|
| 122 |
|
|
|
|
|
|
|
| 123 |
def addNewConcepts(user_concepts):
|
| 124 |
|
| 125 |
return user_concepts
|
| 126 |
|
| 127 |
def search(input, user_concepts):
|
| 128 |
|
| 129 |
+
result = search_document(input, vector_database["chunk_data"], vector_database["chunk_emb"], student_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
return " ".join(result)
|
| 132 |
|
| 133 |
with gr.Blocks() as demo:
|
| 134 |
gr.HTML("""<h1 align="center">Sentence Embedding and Vector Database</h1>""")
|
| 135 |
+
|
| 136 |
search_result = gr.Textbox(show_label=False, placeholder="Search Result", lines=8)
|
| 137 |
|
| 138 |
with gr.Row():
|
| 139 |
with gr.Column(scale=1):
|
| 140 |
+
new_concept_box = gr.Textbox(show_label=False, placeholder="Currently supported concepts in vector database:" + str(vector_database["concepts"]), lines=8)
|
| 141 |
#addConceptBtn = gr.Button("Add concepts")
|
| 142 |
with gr.Column(scale=4):
|
| 143 |
user_input = gr.Textbox(show_label=False, placeholder="Enter question on the concept...", lines=8)
|
vector_database.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866d44a1dc61f3ec63382d40c3dda54f30463ed02d293631b474b86ca61178e2
|
| 3 |
+
size 339130
|