Bima Ardhia commited on
Commit
e5c6c06
·
1 Parent(s): c26528a

add api embed

Browse files
Files changed (2) hide show
  1. api/api_mage_x.py +29 -16
  2. tools/retrive.py +40 -18
api/api_mage_x.py CHANGED
@@ -18,6 +18,7 @@ import firebase_admin
18
  from agent.retrive_agent import run_llm
19
  from pinecone import Pinecone
20
  from langchain_openai import OpenAIEmbeddings
 
21
 
22
 
23
  # Load environment variables
@@ -113,28 +114,47 @@ def fetch_and_embed_data(user_id):
113
 
114
  if 'created_at' in data and data['created_at'] is not None:
115
  data['created_at'] = data['created_at'].replace(tzinfo=None).isoformat()
116
-
117
- if 'updated_at' in data and data['updated_at'] is not None:
118
- data['updated_at'] = data['updated_at'].replace(tzinfo=None).isoformat()
 
 
 
 
119
 
120
  text = ' '.join(str(value) for value in data.values() if value is not None)
121
  print(text)
122
  embedding = create_embeddings(text)
 
 
 
 
 
 
 
 
 
 
123
 
124
  index.upsert(vectors=[{
125
  "id": str(doc.id),
126
  "values": embedding,
127
- "metadata": {
128
- "collection_type": collection,
129
- "text": json.dumps(data),
130
- "firebase_id": str(doc.id)
131
- }
132
  }])
133
 
134
  output = f"Data {user_id} berhasil di embbedings"
135
 
136
  return output
137
 
 
 
 
 
 
 
 
 
 
138
  def create_embeddings(text):
139
  return embedding_model.embed_query(text)
140
 
@@ -293,14 +313,7 @@ def get_chat_history(user_id: str, session_id: str):
293
  messages = [{"role": "user" if isinstance(msg, HumanMessage) else "assistant", "content": msg.content} for msg in chat_history.messages]
294
  return {"chat_history": messages}
295
 
296
- @app.post("/embeddings")
297
- async def get_recommendations(user_input: UserInput):
298
- user_id = user_input.user_id
299
- processed_documents = fetch_and_embed_data(user_id)
300
- return {
301
- "status": "success",
302
- "processed_documents": processed_documents
303
- }
304
 
305
  # Fungsi untuk mengunggah file ke Google Drive
306
  def upload_to_drive(file_path: str, folder_id: str) -> str:
 
18
  from agent.retrive_agent import run_llm
19
  from pinecone import Pinecone
20
  from langchain_openai import OpenAIEmbeddings
21
+ from datetime import datetime
22
 
23
 
24
  # Load environment variables
 
114
 
115
  if 'created_at' in data and data['created_at'] is not None:
116
  data['created_at'] = data['created_at'].replace(tzinfo=None).isoformat()
117
+
118
+ try:
119
+ created_at_str = data["created_at"]
120
+ created_at_timestamp = int(datetime.strptime(created_at_str, "%Y-%m-%dT%H:%M:%SZ").timestamp() * 1000)
121
+ except ValueError:
122
+ print(f"Error: Format tanggal tidak valid untuk item: {doc.id}")
123
+ created_at_timestamp = None
124
 
125
  text = ' '.join(str(value) for value in data.values() if value is not None)
126
  print(text)
127
  embedding = create_embeddings(text)
128
+
129
+ metadata = {
130
+ "firebase_id": str(doc.id),
131
+ "created_at": created_at_timestamp, # Gunakan timestamp di metadata
132
+ "likes_count": data.get("likes_count"), # Contoh metadata tambahan
133
+ "location": data.get("location", ""), # Contoh metadata tambahan
134
+ "category": data.get("category", ""), # Contoh metadata tambahan
135
+ "collection_type": collection,
136
+ "text": json.dumps(data)
137
+ }
138
 
139
  index.upsert(vectors=[{
140
  "id": str(doc.id),
141
  "values": embedding,
142
+ "metadata": metadata,
 
 
 
 
143
  }])
144
 
145
  output = f"Data {user_id} berhasil di embbedings"
146
 
147
  return output
148
 
149
+ @app.post("/embeddings")
150
+ async def get_recommendations(user_input: UserInput):
151
+ user_id = user_input.user_id
152
+ processed_documents = fetch_and_embed_data(user_id)
153
+ return {
154
+ "status": "success",
155
+ "processed_documents": processed_documents
156
+ }
157
+
158
  def create_embeddings(text):
159
  return embedding_model.embed_query(text)
160
 
 
313
  messages = [{"role": "user" if isinstance(msg, HumanMessage) else "assistant", "content": msg.content} for msg in chat_history.messages]
314
  return {"chat_history": messages}
315
 
316
+
 
 
 
 
 
 
 
317
 
318
  # Fungsi untuk mengunggah file ke Google Drive
319
  def upload_to_drive(file_path: str, folder_id: str) -> str:
tools/retrive.py CHANGED
@@ -3,7 +3,7 @@ from langchain_pinecone import PineconeVectorStore
3
  from langchain_openai import OpenAIEmbeddings
4
  from pinecone import Pinecone
5
  import json
6
- from datetime import datetime
7
  from langchain_community.tools import WikipediaQueryRun
8
  from langchain_community.utilities import WikipediaAPIWrapper
9
  import os
@@ -33,17 +33,41 @@ def retrieve_wisata(query: str) -> str:
33
  retrieved_texts.append({"content": text, "metadata": metadata})
34
  return json.dumps(retrieved_texts, indent=2)
35
 
36
- def retrieve_berita(query: str) -> str:
37
  """
38
- Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan.
39
  """
40
- search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_berita'})
41
- retrieved_texts = []
42
- for result in search_results:
43
- text = result.page_content
44
- metadata = result.metadata.get("firebase_id", "")
45
- retrieved_texts.append({"content": text, "metadata": metadata})
46
- return json.dumps(retrieved_texts, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def retrieve_umkm(query: str) -> str:
49
  """
@@ -64,14 +88,12 @@ def retrieve_wikipedia_info(query: str) -> str:
64
  result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian
65
  return result if result else "Tidak ditemukan hasil di Wikipedia."
66
 
67
- def get_current_time(*args, **kwargs) -> str:
68
- """
69
- Mengembalikan waktu saat ini dalam format yang mudah dibaca.
70
- """
71
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
72
- return current_time
73
 
74
  # Contoh penggunaan fungsi baru
75
  # print(retrieve_umkm("Produk UMKM terbaik?"))
76
- # print(retrieve_berita("Berita ekonomi terbaru?"))
77
- # print(retrieve_wisata("Tempat wisata terpopuler?"))
 
 
 
 
3
  from langchain_openai import OpenAIEmbeddings
4
  from pinecone import Pinecone
5
  import json
6
+ from datetime import datetime, timedelta
7
  from langchain_community.tools import WikipediaQueryRun
8
  from langchain_community.utilities import WikipediaAPIWrapper
9
  import os
 
33
  retrieved_texts.append({"content": text, "metadata": metadata})
34
  return json.dumps(retrieved_texts, indent=2)
35
 
36
+ def get_current_time(*args, **kwargs) -> str:
37
  """
38
+ Mengembalikan waktu saat ini dalam format yang mudah dibaca.
39
  """
40
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
41
+ return current_time
42
+
43
+ def retrieve_berita(query: str, tanggal_str: str) -> str:
44
+ """
45
+ Mengambil berita berdasarkan query dan filter tanggal (string).
46
+ """
47
+ try:
48
+ # Konversi string tanggal ke objek datetime
49
+ tanggal = datetime.strptime(tanggal_str, "%Y-%m-%dT%H:%M:%SZ") # Sesuaikan format dengan data Anda
50
+
51
+ # Konversi ke timestamp numerik (milidetik)
52
+ timestamp = int(tanggal.timestamp() * 1000)
53
+
54
+ search_results = docsearch.similarity_search(
55
+ query,
56
+ filter={
57
+ 'collection_type': 'data_berita',
58
+ 'created_at': {'$gte': timestamp}
59
+ }
60
+ )
61
+
62
+ retrieved_texts = []
63
+ for result in search_results:
64
+ text = result.page_content
65
+ metadata = result.metadata.get("firebase_id", "")
66
+ retrieved_texts.append({"content": text, "metadata": metadata})
67
+ return json.dumps(retrieved_texts, indent=2)
68
+ except ValueError:
69
+ print("Format tanggal tidak valid.")
70
+ return "[]" # Atau handling error yang sesuai
71
 
72
  def retrieve_umkm(query: str) -> str:
73
  """
 
88
  result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian
89
  return result if result else "Tidak ditemukan hasil di Wikipedia."
90
 
91
+
 
 
 
 
 
92
 
93
  # Contoh penggunaan fungsi baru
94
  # print(retrieve_umkm("Produk UMKM terbaik?"))
95
+ query = "berita tentang ekonomi"
96
+ tanggal_str = "2024-11-22T16:00:00Z"
97
+
98
+ hasil = retrieve_berita(query, tanggal_str)
99
+ print(hasil)# print(retrieve_wisata("Tempat wisata terpopuler?"))