File size: 5,500 Bytes
27b5f6d
38d2586
27b5f6d
e8a6b78
 
38d2586
27b5f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb06dca
27b5f6d
eb06dca
 
27b5f6d
e8a6b78
 
 
38d2586
e8a6b78
38d2586
e8a6b78
 
 
27b5f6d
 
 
 
aadfb3f
e8a6b78
27b5f6d
 
 
 
 
 
 
 
 
 
 
 
 
e8a6b78
27b5f6d
 
 
 
 
 
 
 
 
 
 
 
e8a6b78
27b5f6d
 
 
 
 
 
 
 
 
 
e8a6b78
27b5f6d
 
 
 
 
 
 
e8a6b78
27b5f6d
 
 
 
 
e8a6b78
5b0f158
27b5f6d
 
 
 
156199c
27b5f6d
 
a10afa8
27b5f6d
156199c
38d2586
 
 
 
 
 
27b5f6d
 
 
 
e8a6b78
27b5f6d
 
156199c
27b5f6d
 
 
 
156199c
38d2586
 
 
 
 
 
27b5f6d
 
e8a6b78
38d2586
 
 
 
27b5f6d
 
e8a6b78
38d2586
 
 
 
 
27b5f6d
63da829
 
 
 
 
 
27b5f6d
e8a6b78
27b5f6d
eb06dca
 
e8a6b78
eb06dca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""service to manage trains"""
from typing import Any

import firebase_admin

from Brain.src.model.req_model import ReqModel
from Brain.src.rising_plugin.csv_embed import get_embed
from Brain.src.rising_plugin.pinecone_engine import (
    get_pinecone_index_namespace,
    delete_pinecone,
    add_pinecone,
    delete_all_pinecone,
)

from firebase_admin import firestore
import datetime


def to_json(page_content: str):
    return {
        "page_content": page_content,
        "timestamp": datetime.datetime.now().timestamp(),
    }


class TrainService:
    """train (getting embedding) and update pinecone with embeddings by document_id
    train datatype:
    key: document_id
    values: {page_content}"""

    db: Any
    documents_ref: Any

    def __init__(self, firebase_app: firebase_admin.App, setting: ReqModel):
        self.firebase_app = firebase_app
        self.setting = setting

    def init_firestore(self):
        self.db = firestore.client(app=self.firebase_app)
        self.documents_ref = self.db.collection("documents")

    """read all documents from firestore"""

    def read_all_documents(self) -> Any:
        self.init_firestore()
        query = self.documents_ref.order_by("timestamp")
        docs = query.stream()
        result = []
        for item in docs:
            item_data = item.to_dict()
            result.append(
                {"document_id": item.id, "page_content": item_data["page_content"]}
            )
        return result

    """read one document from firestore"""

    def read_one_document(self, document_id: str):
        self.init_firestore()
        doc = self.documents_ref.document(document_id).get()
        if doc.exists:
            return {
                "document_id": document_id,
                "page_content": doc.to_dict()["page_content"],
            }
        else:
            return None

    """create a new document and train it"""

    def create_one_document(self, page_content: str):
        self.init_firestore()
        # Auto-generate document ID
        auto_generated_doc_ref = self.documents_ref.document()
        auto_generated_doc_ref.set(to_json(page_content))
        auto_generated_document_id = auto_generated_doc_ref.id
        self.train_one_document(auto_generated_document_id, page_content)
        return {"document_id": auto_generated_document_id, "page_content": page_content}

    """update a document by using id and train it"""

    def update_one_document(self, document_id: str, page_content: str):
        self.init_firestore()
        self.documents_ref.document(document_id).update(to_json(page_content))
        self.train_one_document(document_id, page_content)
        return {"document_id": document_id, "page_content": page_content}

    """delete a document by using document_id"""

    def delete_one_document(self, document_id: str):
        self.init_firestore()
        self.documents_ref.document(document_id).delete()
        self.delete_one_pinecone(document_id)
        return {"document_id": document_id}

    def train_all_documents(self) -> str:
        self.init_firestore()
        self.delete_all()
        documents = self.read_all_documents()
        result = list()
        pinecone_namespace = self.get_pinecone_index_namespace()
        for item in documents:
            query_result = get_embed(data=item["page_content"], setting=self.setting)
            result.append(query_result)
            key = item["document_id"]
            value = f'{item["page_content"]}'
            # get vectoring data(embedding data)
            vectoring_values = get_embed(data=value, setting=self.setting)
            add_pinecone(
                namespace=pinecone_namespace,
                key=key,
                value=vectoring_values,
                setting=self.setting,
            )

        return "trained all documents successfully"

    def train_one_document(self, document_id: str, page_content: str) -> None:
        self.init_firestore()
        pinecone_namespace = self.get_pinecone_index_namespace()
        result = list()
        query_result = get_embed(data=page_content, setting=self.setting)
        result.append(query_result)
        key = document_id
        value = f"{page_content}, {query_result}"
        # get vectoring data(embedding data)
        vectoring_values = get_embed(data=value, setting=self.setting)
        add_pinecone(
            namespace=pinecone_namespace,
            key=key,
            value=vectoring_values,
            setting=self.setting,
        )

    def delete_all(self) -> Any:
        self.init_firestore()
        return delete_all_pinecone(
            namespace=self.get_pinecone_index_namespace(),
            setting=self.setting,
        )

    def delete_one_pinecone(self, document_id: str) -> Any:
        self.init_firestore()
        return delete_pinecone(
            namespace=self.get_pinecone_index_namespace(),
            key=document_id,
            setting=self.setting,
        )

    def delete_all_training_from_pinecone(self) -> Any:
        self.init_firestore()
        return delete_all_pinecone(
            namespace=self.get_pinecone_index_namespace(), setting=self.setting
        )

    def get_pinecone_index_namespace(self) -> str:
        self.init_firestore()
        return get_pinecone_index_namespace(f"trains")

    def get_pinecone_index_train_namespace(self) -> str:
        self.init_firestore()
        return get_pinecone_index_namespace(f"trains")