EddyGiusepe commited on
Commit
3c30949
·
1 Parent(s): a56af3d

Estudando o DB redis

Browse files
Index_Creation/1_index_creation_and_query.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro
3
+
4
+
5
+ Similaridade do Vetor
6
+ =====================
7
+ Usamos os vetores (Embeddings) em sistemas de recomendação, pesquisa de
8
+ imagens e vídeos, recuperação de documentos e Q&A 🤗.
9
+
10
+ Versão: 1.0.0
11
+ Data: 29/07/2023
12
+ Autor: Dr.Eddy Giusepe
13
+
14
+ Método de execução:
15
+ $ python 1_index_creation.py
16
+ """
17
+ import redis
18
+ from redis.commands.search.field import TagField, VectorField
19
+ from redis.commands.search.indexDefinition import IndexDefinition, IndexType
20
+ from redis.commands.search.query import Query
21
+
22
+ r = redis.Redis(host="localhost", port=6379)
23
+
24
+ INDEX_NAME = "index" # Vector Index Name
25
+ DOC_PREFIX = "doc:" # RediSearch Key Prefix for the Index
26
+
27
+ def create_index(vector_dimensions: int):
28
+ try:
29
+ # check to see if index exists
30
+ r.ft(INDEX_NAME).info()
31
+ print("Index already exists!")
32
+ except:
33
+ # schema
34
+ schema = (
35
+ TagField("tag"), # Tag Field Name
36
+ TagField("id"),
37
+ VectorField("vector", # Vector Field Name
38
+ "FLAT", { # Vector Index Type: FLAT or HNSW
39
+ "TYPE": "FLOAT32", # FLOAT32 or FLOAT64
40
+ "DIM": vector_dimensions, # Number of Vector Dimensions
41
+ "DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
42
+ }
43
+ ),
44
+ )
45
+
46
+ # index Definition
47
+ definition = IndexDefinition(prefix=[DOC_PREFIX], index_type=IndexType.HASH)
48
+
49
+ # create Index
50
+ r.ft(INDEX_NAME).create_index(fields=schema, definition=definition)
51
+
52
+ # define vector dimensions
53
+ VECTOR_DIMENSIONS = 1536
54
+
55
+ texts = [
56
+ "Hoje é realmente um ótimo dia!",
57
+ "O cachorro da porta ao lado late muito alto.",
58
+ "Meu gato escapou e saiu antes que eu pudesse fechar a porta.",
59
+ "Amanhã deve chover e trovejar."
60
+ ]
61
+
62
+ # delete index
63
+ #r.ft(INDEX_NAME).dropindex(delete_documents=True)
64
+
65
+ # make a new one
66
+ create_index(vector_dimensions=VECTOR_DIMENSIONS)
67
+
68
+
69
+ import openai
70
+ import os
71
+ import numpy as np
72
+ from dotenv import load_dotenv, find_dotenv
73
+ _ = load_dotenv(find_dotenv()) # read local .env file
74
+
75
+ openai.api_key = os.environ['OPENAI_API_KEY']
76
+
77
+
78
+ # Create Embeddings with OpenAI text-embedding-ada-002
79
+ # https://openai.com/blog/new-and-improved-embedding-model
80
+ response = openai.Embedding.create(input=texts, engine="text-embedding-ada-002")
81
+ #print(response)
82
+ embeddings = np.array([r["embedding"] for r in response["data"]], dtype=np.float32)
83
+
84
+ # Write to Redis
85
+ pipe = r.pipeline()
86
+ for i, embedding in enumerate(embeddings):
87
+ pipe.hset(f"doc:{i}", mapping = {
88
+ "vector": embedding.tobytes(),
89
+ "content": texts[i],
90
+ "tag": "Eddy"
91
+ })
92
+ res = pipe.execute()
93
+ #print("🤗")
94
+ #print(embeddings)
95
+
96
+ #text = "animals"
97
+ text = "Eu gosto muito de pets, especificamente de gatos"
98
+
99
+ # create query embedding
100
+ response = openai.Embedding.create(input=[text], engine="text-embedding-ada-002")
101
+ #print(response["data"][0])
102
+ query_embedding = np.array([r["embedding"] for r in response["data"]], dtype=np.float32)[0]
103
+ #print("🤗🤗")
104
+ #print(len(query_embedding))
105
+
106
+ filter_query = "*"
107
+ # query for similar documents
108
+ query = (
109
+ Query(f"{filter_query}=>[KNN 3 @vector $vec as score]")
110
+ .sort_by("score")
111
+ .return_fields("content", "tag", "score")
112
+ .paging(0, 3)
113
+ .dialect(2)
114
+ )
115
+
116
+ query_params = {"vec": query_embedding.tobytes()}
117
+ response = r.ft(INDEX_NAME).search(query, query_params).docs
118
+ print("🤗🤗🤗")
119
+ print(response)
Index_Creation/query_by_index.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro
3
+
4
+
5
+ Similaridade do Vetor
6
+ =====================
7
+ Usamos os vetores (Embeddings) em sistemas de recomendação, pesquisa de
8
+ imagens e vídeos, recuperação de documentos e Q&A 🤗.
9
+
10
+ Versão: 1.0.0
11
+ Data: 29/07/2023
12
+ Autor: Dr.Eddy Giusepe
13
+
14
+ Método de execução:
15
+ $ python query_by_index.py
16
+ """
17
+ import redis
18
+ from redis.commands.search.query import Query # Para fazer a pesquisa dos K vizinhos mais próximos
19
+ import numpy as np
20
+ import openai
21
+ import os
22
+ from dotenv import load_dotenv, find_dotenv
23
+ _ = load_dotenv(find_dotenv()) # read local .env file
24
+ openai.api_key = os.environ['OPENAI_API_KEY']
25
+
26
+ # Conecte-se ao servidor Redis local
27
+ redis_host = "localhost"
28
+ redis_port = 6379
29
+ r = redis.Redis(host=redis_host, port=redis_port)
30
+
31
+ # Texto da pergunta
32
+ text = "Qual é o horário de atendimento no codhab?"
33
+
34
+ # Crie o embedding da pergunta
35
+ response = openai.Embedding.create(input=text, engine="text-embedding-ada-002")
36
+ embedding_text = response['data'][0]['embedding']
37
+ #query_embedding = np.array([r["embedding"] for r in response["data"]], dtype=np.float32)[0]
38
+ array_embedding = np.array(embedding_text, dtype=np.float32)
39
+ query_embedding = array_embedding.tobytes() # Tem que estar em Bytes ... o REDEIS pede assim.
40
+
41
+
42
+
43
+ # Defina a lista de índices a serem consultados em ordem de prioridade
44
+ index_list = ["secretaria_de_saude_do_df", "detran", "codhab"]
45
+
46
+ # Crie a query considerando a busca por similaridade nos índices
47
+ filter_query = " | ".join(f"(@vector [{index}] $vec){' ' + index if i > 0 else ''}" for i, index in enumerate(index_list))
48
+ query = (
49
+ Query(f"{filter_query} =>[KNN 3 @vector $vec as score]")
50
+ .sort_by("@score")
51
+ .paging(0, 3)
52
+ .return_fields("score")
53
+ .dialect(2)
54
+ )
55
+
56
+ # Adicione o comando de busca KNN com o query_embedding à (@tag:{ EddyGiusepe })pipeline
57
+ pipe = r.pipeline()
58
+ pipe.execute_command("FT.SEARCH", "codhab", query, "RETURN", 1, "@vector", "VECTORS", query_embedding) # Substitua "myIndex" pelo nome do seu índice
59
+ result = pipe.execute()
60
+
61
+ print(result)
62
+
63
+ #teste = r.hgetall("doc:codhab:86ce630aa248409191e01b33a5f9fedf")
64
+ teste = r.hgetall("doc:codhab:86ce630aa248409191e01b33a5f9fedf")
65
+ print(teste.keys())
66
+ print(teste[b'content_vector']) # Este é o Embedding, não precisa de "utf-8"
67
+ print(teste[b'metadata'].decode('utf-8'))
68
+ print(teste[b'content'].decode('utf-8'))
69
+
70
+ import json
71
+ dic = json.loads(teste[b'metadata'].decode('utf-8'))
72
+
73
+ print(dic["Pergunta"])
74
+ print(dic["Resposta"])
75
+ print(dic["Fonte"])
76
+
77
+
78
+ keys = r.keys("doc:codhab:*")
79
+ print(len(keys))
80
+ # if len(result["data"]) == 0:
81
+ # # Se não houver resultados nos índices, faça uma busca em todos eles usando "*"
82
+ # query = (
83
+ # Query(f"* =>[KNN 3 @vector $vec as score]")
84
+ # .sort_by("@score")
85
+ # .paging(0, 3)
86
+ # .return_fields("content", "tag", "score")
87
+ # .dialect(2)
88
+ # )
89
+ # result = query.execute(set={"vec": query_embedding.tolist()})
90
+
91
+ # # Resultados
92
+ # print(result["data"])