wb-droid commited on
Commit
fe60887
·
1 Parent(s): a6bffe9

use pre-built vector table for performance.

Browse files
Files changed (2) hide show
  1. app.py +6 -14
  2. vector_database.pkl +3 -0
app.py CHANGED
@@ -8,7 +8,7 @@ from transformers import BertModel, BertTokenizer
8
  #import gzip
9
  #import pandas as pd
10
  import requests
11
-
12
 
13
  class EmbeddingModel(nn.Module):
14
  def __init__(self, bertName = "bert-base-uncased"): # other bert models can also be supported
@@ -117,35 +117,27 @@ class TrainStudent(nn.Module):
117
 
118
  student_model=torch.load("myTextEmbeddingStudent.pt",map_location='cpu').student_model.eval()
119
 
 
 
120
 
121
- #new_chunk_data = []
122
- #new_chunk_emb = tensor([])
123
  def addNewConcepts(user_concepts):
124
 
125
  return user_concepts
126
 
127
  def search(input, user_concepts):
128
 
129
- if user_concepts:
130
- new_chunk_data = generate_chunk_data(user_concepts.split(","))
131
- new_chunk_emb = generate_chunk_emb(student_model, new_chunk_data)
132
- result = search_document(input, new_chunk_data, new_chunk_emb, student_model)
133
- else:
134
- chunk_data = generate_chunk_data(["machine learning","moon","brain"])
135
- # create the embedding vector database
136
- chunk_emb = generate_chunk_emb(student_model, chunk_data)
137
- result = search_document(input, chunk_data, chunk_emb, student_model)
138
 
139
  return " ".join(result)
140
 
141
  with gr.Blocks() as demo:
142
  gr.HTML("""<h1 align="center">Sentence Embedding and Vector Database</h1>""")
143
-
144
  search_result = gr.Textbox(show_label=False, placeholder="Search Result", lines=8)
145
 
146
  with gr.Row():
147
  with gr.Column(scale=1):
148
- new_concept_box = gr.Textbox(show_label=False, placeholder="Add new concepts", lines=8)
149
  #addConceptBtn = gr.Button("Add concepts")
150
  with gr.Column(scale=4):
151
  user_input = gr.Textbox(show_label=False, placeholder="Enter question on the concept...", lines=8)
 
8
  #import gzip
9
  #import pandas as pd
10
  import requests
11
+ import pickle
12
 
13
  class EmbeddingModel(nn.Module):
14
  def __init__(self, bertName = "bert-base-uncased"): # other bert models can also be supported
 
117
 
118
  student_model=torch.load("myTextEmbeddingStudent.pt",map_location='cpu').student_model.eval()
119
 
120
+ with open("vector_database.pkl","rb") as f:
121
+ vector_database=pickle.load(f)
122
 
 
 
123
  def addNewConcepts(user_concepts):
124
 
125
  return user_concepts
126
 
127
  def search(input, user_concepts):
128
 
129
+ result = search_document(input, vector_database["chunk_data"], vector_database["chunk_emb"], student_model)
 
 
 
 
 
 
 
 
130
 
131
  return " ".join(result)
132
 
133
  with gr.Blocks() as demo:
134
  gr.HTML("""<h1 align="center">Sentence Embedding and Vector Database</h1>""")
135
+
136
  search_result = gr.Textbox(show_label=False, placeholder="Search Result", lines=8)
137
 
138
  with gr.Row():
139
  with gr.Column(scale=1):
140
+ new_concept_box = gr.Textbox(show_label=False, placeholder="Currently supported concepts in vector database:" + str(vector_database["concepts"]), lines=8)
141
  #addConceptBtn = gr.Button("Add concepts")
142
  with gr.Column(scale=4):
143
  user_input = gr.Textbox(show_label=False, placeholder="Enter question on the concept...", lines=8)
vector_database.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866d44a1dc61f3ec63382d40c3dda54f30463ed02d293631b474b86ca61178e2
3
+ size 339130