File size: 5,500 Bytes
27b5f6d 38d2586 27b5f6d e8a6b78 38d2586 27b5f6d eb06dca 27b5f6d eb06dca 27b5f6d e8a6b78 38d2586 e8a6b78 38d2586 e8a6b78 27b5f6d aadfb3f e8a6b78 27b5f6d e8a6b78 27b5f6d e8a6b78 27b5f6d e8a6b78 27b5f6d e8a6b78 27b5f6d e8a6b78 5b0f158 27b5f6d 156199c 27b5f6d a10afa8 27b5f6d 156199c 38d2586 27b5f6d e8a6b78 27b5f6d 156199c 27b5f6d 156199c 38d2586 27b5f6d e8a6b78 38d2586 27b5f6d e8a6b78 38d2586 27b5f6d 63da829 27b5f6d e8a6b78 27b5f6d eb06dca e8a6b78 eb06dca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | """service to manage trains"""
from typing import Any
import firebase_admin
from Brain.src.model.req_model import ReqModel
from Brain.src.rising_plugin.csv_embed import get_embed
from Brain.src.rising_plugin.pinecone_engine import (
get_pinecone_index_namespace,
delete_pinecone,
add_pinecone,
delete_all_pinecone,
)
from firebase_admin import firestore
import datetime
def to_json(page_content: str):
return {
"page_content": page_content,
"timestamp": datetime.datetime.now().timestamp(),
}
class TrainService:
"""train (getting embedding) and update pinecone with embeddings by document_id
train datatype:
key: document_id
values: {page_content}"""
db: Any
documents_ref: Any
def __init__(self, firebase_app: firebase_admin.App, setting: ReqModel):
self.firebase_app = firebase_app
self.setting = setting
def init_firestore(self):
self.db = firestore.client(app=self.firebase_app)
self.documents_ref = self.db.collection("documents")
"""read all documents from firestore"""
def read_all_documents(self) -> Any:
self.init_firestore()
query = self.documents_ref.order_by("timestamp")
docs = query.stream()
result = []
for item in docs:
item_data = item.to_dict()
result.append(
{"document_id": item.id, "page_content": item_data["page_content"]}
)
return result
"""read one document from firestore"""
def read_one_document(self, document_id: str):
self.init_firestore()
doc = self.documents_ref.document(document_id).get()
if doc.exists:
return {
"document_id": document_id,
"page_content": doc.to_dict()["page_content"],
}
else:
return None
"""create a new document and train it"""
def create_one_document(self, page_content: str):
self.init_firestore()
# Auto-generate document ID
auto_generated_doc_ref = self.documents_ref.document()
auto_generated_doc_ref.set(to_json(page_content))
auto_generated_document_id = auto_generated_doc_ref.id
self.train_one_document(auto_generated_document_id, page_content)
return {"document_id": auto_generated_document_id, "page_content": page_content}
"""update a document by using id and train it"""
def update_one_document(self, document_id: str, page_content: str):
self.init_firestore()
self.documents_ref.document(document_id).update(to_json(page_content))
self.train_one_document(document_id, page_content)
return {"document_id": document_id, "page_content": page_content}
"""delete a document by using document_id"""
def delete_one_document(self, document_id: str):
self.init_firestore()
self.documents_ref.document(document_id).delete()
self.delete_one_pinecone(document_id)
return {"document_id": document_id}
def train_all_documents(self) -> str:
self.init_firestore()
self.delete_all()
documents = self.read_all_documents()
result = list()
pinecone_namespace = self.get_pinecone_index_namespace()
for item in documents:
query_result = get_embed(data=item["page_content"], setting=self.setting)
result.append(query_result)
key = item["document_id"]
value = f'{item["page_content"]}'
# get vectoring data(embedding data)
vectoring_values = get_embed(data=value, setting=self.setting)
add_pinecone(
namespace=pinecone_namespace,
key=key,
value=vectoring_values,
setting=self.setting,
)
return "trained all documents successfully"
def train_one_document(self, document_id: str, page_content: str) -> None:
self.init_firestore()
pinecone_namespace = self.get_pinecone_index_namespace()
result = list()
query_result = get_embed(data=page_content, setting=self.setting)
result.append(query_result)
key = document_id
value = f"{page_content}, {query_result}"
# get vectoring data(embedding data)
vectoring_values = get_embed(data=value, setting=self.setting)
add_pinecone(
namespace=pinecone_namespace,
key=key,
value=vectoring_values,
setting=self.setting,
)
def delete_all(self) -> Any:
self.init_firestore()
return delete_all_pinecone(
namespace=self.get_pinecone_index_namespace(),
setting=self.setting,
)
def delete_one_pinecone(self, document_id: str) -> Any:
self.init_firestore()
return delete_pinecone(
namespace=self.get_pinecone_index_namespace(),
key=document_id,
setting=self.setting,
)
def delete_all_training_from_pinecone(self) -> Any:
self.init_firestore()
return delete_all_pinecone(
namespace=self.get_pinecone_index_namespace(), setting=self.setting
)
def get_pinecone_index_namespace(self) -> str:
self.init_firestore()
return get_pinecone_index_namespace(f"trains")
def get_pinecone_index_train_namespace(self) -> str:
self.init_firestore()
return get_pinecone_index_namespace(f"trains")
|