Spaces:
Sleeping
Sleeping
Bima Ardhia commited on
Commit ·
e5c6c06
1
Parent(s): c26528a
add api embed
Browse files- api/api_mage_x.py +29 -16
- tools/retrive.py +40 -18
api/api_mage_x.py
CHANGED
|
@@ -18,6 +18,7 @@ import firebase_admin
|
|
| 18 |
from agent.retrive_agent import run_llm
|
| 19 |
from pinecone import Pinecone
|
| 20 |
from langchain_openai import OpenAIEmbeddings
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# Load environment variables
|
|
@@ -113,28 +114,47 @@ def fetch_and_embed_data(user_id):
|
|
| 113 |
|
| 114 |
if 'created_at' in data and data['created_at'] is not None:
|
| 115 |
data['created_at'] = data['created_at'].replace(tzinfo=None).isoformat()
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
text = ' '.join(str(value) for value in data.values() if value is not None)
|
| 121 |
print(text)
|
| 122 |
embedding = create_embeddings(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
index.upsert(vectors=[{
|
| 125 |
"id": str(doc.id),
|
| 126 |
"values": embedding,
|
| 127 |
-
"metadata":
|
| 128 |
-
"collection_type": collection,
|
| 129 |
-
"text": json.dumps(data),
|
| 130 |
-
"firebase_id": str(doc.id)
|
| 131 |
-
}
|
| 132 |
}])
|
| 133 |
|
| 134 |
output = f"Data {user_id} berhasil di embbedings"
|
| 135 |
|
| 136 |
return output
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
def create_embeddings(text):
|
| 139 |
return embedding_model.embed_query(text)
|
| 140 |
|
|
@@ -293,14 +313,7 @@ def get_chat_history(user_id: str, session_id: str):
|
|
| 293 |
messages = [{"role": "user" if isinstance(msg, HumanMessage) else "assistant", "content": msg.content} for msg in chat_history.messages]
|
| 294 |
return {"chat_history": messages}
|
| 295 |
|
| 296 |
-
|
| 297 |
-
async def get_recommendations(user_input: UserInput):
|
| 298 |
-
user_id = user_input.user_id
|
| 299 |
-
processed_documents = fetch_and_embed_data(user_id)
|
| 300 |
-
return {
|
| 301 |
-
"status": "success",
|
| 302 |
-
"processed_documents": processed_documents
|
| 303 |
-
}
|
| 304 |
|
| 305 |
# Fungsi untuk mengunggah file ke Google Drive
|
| 306 |
def upload_to_drive(file_path: str, folder_id: str) -> str:
|
|
|
|
| 18 |
from agent.retrive_agent import run_llm
|
| 19 |
from pinecone import Pinecone
|
| 20 |
from langchain_openai import OpenAIEmbeddings
|
| 21 |
+
from datetime import datetime
|
| 22 |
|
| 23 |
|
| 24 |
# Load environment variables
|
|
|
|
| 114 |
|
| 115 |
if 'created_at' in data and data['created_at'] is not None:
|
| 116 |
data['created_at'] = data['created_at'].replace(tzinfo=None).isoformat()
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
created_at_str = data["created_at"]
|
| 120 |
+
created_at_timestamp = int(datetime.strptime(created_at_str, "%Y-%m-%dT%H:%M:%SZ").timestamp() * 1000)
|
| 121 |
+
except ValueError:
|
| 122 |
+
print(f"Error: Format tanggal tidak valid untuk item: {doc.id}")
|
| 123 |
+
created_at_timestamp = None
|
| 124 |
|
| 125 |
text = ' '.join(str(value) for value in data.values() if value is not None)
|
| 126 |
print(text)
|
| 127 |
embedding = create_embeddings(text)
|
| 128 |
+
|
| 129 |
+
metadata = {
|
| 130 |
+
"firebase_id": str(doc.id),
|
| 131 |
+
"created_at": created_at_timestamp, # Gunakan timestamp di metadata
|
| 132 |
+
"likes_count": data.get("likes_count"), # Contoh metadata tambahan
|
| 133 |
+
"location": data.get("location", ""), # Contoh metadata tambahan
|
| 134 |
+
"category": data.get("category", ""), # Contoh metadata tambahan
|
| 135 |
+
"collection_type": collection,
|
| 136 |
+
"text": json.dumps(data)
|
| 137 |
+
}
|
| 138 |
|
| 139 |
index.upsert(vectors=[{
|
| 140 |
"id": str(doc.id),
|
| 141 |
"values": embedding,
|
| 142 |
+
"metadata": metadata,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
}])
|
| 144 |
|
| 145 |
output = f"Data {user_id} berhasil di embbedings"
|
| 146 |
|
| 147 |
return output
|
| 148 |
|
| 149 |
+
@app.post("/embeddings")
|
| 150 |
+
async def get_recommendations(user_input: UserInput):
|
| 151 |
+
user_id = user_input.user_id
|
| 152 |
+
processed_documents = fetch_and_embed_data(user_id)
|
| 153 |
+
return {
|
| 154 |
+
"status": "success",
|
| 155 |
+
"processed_documents": processed_documents
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
def create_embeddings(text):
|
| 159 |
return embedding_model.embed_query(text)
|
| 160 |
|
|
|
|
| 313 |
messages = [{"role": "user" if isinstance(msg, HumanMessage) else "assistant", "content": msg.content} for msg in chat_history.messages]
|
| 314 |
return {"chat_history": messages}
|
| 315 |
|
| 316 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
# Fungsi untuk mengunggah file ke Google Drive
|
| 319 |
def upload_to_drive(file_path: str, folder_id: str) -> str:
|
tools/retrive.py
CHANGED
|
@@ -3,7 +3,7 @@ from langchain_pinecone import PineconeVectorStore
|
|
| 3 |
from langchain_openai import OpenAIEmbeddings
|
| 4 |
from pinecone import Pinecone
|
| 5 |
import json
|
| 6 |
-
from datetime import datetime
|
| 7 |
from langchain_community.tools import WikipediaQueryRun
|
| 8 |
from langchain_community.utilities import WikipediaAPIWrapper
|
| 9 |
import os
|
|
@@ -33,17 +33,41 @@ def retrieve_wisata(query: str) -> str:
|
|
| 33 |
retrieved_texts.append({"content": text, "metadata": metadata})
|
| 34 |
return json.dumps(retrieved_texts, indent=2)
|
| 35 |
|
| 36 |
-
def
|
| 37 |
"""
|
| 38 |
-
|
| 39 |
"""
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def retrieve_umkm(query: str) -> str:
|
| 49 |
"""
|
|
@@ -64,14 +88,12 @@ def retrieve_wikipedia_info(query: str) -> str:
|
|
| 64 |
result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian
|
| 65 |
return result if result else "Tidak ditemukan hasil di Wikipedia."
|
| 66 |
|
| 67 |
-
|
| 68 |
-
"""
|
| 69 |
-
Mengembalikan waktu saat ini dalam format yang mudah dibaca.
|
| 70 |
-
"""
|
| 71 |
-
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 72 |
-
return current_time
|
| 73 |
|
| 74 |
# Contoh penggunaan fungsi baru
|
| 75 |
# print(retrieve_umkm("Produk UMKM terbaik?"))
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from langchain_openai import OpenAIEmbeddings
|
| 4 |
from pinecone import Pinecone
|
| 5 |
import json
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
from langchain_community.tools import WikipediaQueryRun
|
| 8 |
from langchain_community.utilities import WikipediaAPIWrapper
|
| 9 |
import os
|
|
|
|
| 33 |
retrieved_texts.append({"content": text, "metadata": metadata})
|
| 34 |
return json.dumps(retrieved_texts, indent=2)
|
| 35 |
|
| 36 |
+
def get_current_time(*args, **kwargs) -> str:
|
| 37 |
"""
|
| 38 |
+
Mengembalikan waktu saat ini dalam format yang mudah dibaca.
|
| 39 |
"""
|
| 40 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 41 |
+
return current_time
|
| 42 |
+
|
| 43 |
+
def retrieve_berita(query: str, tanggal_str: str) -> str:
|
| 44 |
+
"""
|
| 45 |
+
Mengambil berita berdasarkan query dan filter tanggal (string).
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
# Konversi string tanggal ke objek datetime
|
| 49 |
+
tanggal = datetime.strptime(tanggal_str, "%Y-%m-%dT%H:%M:%SZ") # Sesuaikan format dengan data Anda
|
| 50 |
+
|
| 51 |
+
# Konversi ke timestamp numerik (milidetik)
|
| 52 |
+
timestamp = int(tanggal.timestamp() * 1000)
|
| 53 |
+
|
| 54 |
+
search_results = docsearch.similarity_search(
|
| 55 |
+
query,
|
| 56 |
+
filter={
|
| 57 |
+
'collection_type': 'data_berita',
|
| 58 |
+
'created_at': {'$gte': timestamp}
|
| 59 |
+
}
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
retrieved_texts = []
|
| 63 |
+
for result in search_results:
|
| 64 |
+
text = result.page_content
|
| 65 |
+
metadata = result.metadata.get("firebase_id", "")
|
| 66 |
+
retrieved_texts.append({"content": text, "metadata": metadata})
|
| 67 |
+
return json.dumps(retrieved_texts, indent=2)
|
| 68 |
+
except ValueError:
|
| 69 |
+
print("Format tanggal tidak valid.")
|
| 70 |
+
return "[]" # Atau handling error yang sesuai
|
| 71 |
|
| 72 |
def retrieve_umkm(query: str) -> str:
|
| 73 |
"""
|
|
|
|
| 88 |
result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian
|
| 89 |
return result if result else "Tidak ditemukan hasil di Wikipedia."
|
| 90 |
|
| 91 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Contoh penggunaan fungsi baru
|
| 94 |
# print(retrieve_umkm("Produk UMKM terbaik?"))
|
| 95 |
+
query = "berita tentang ekonomi"
|
| 96 |
+
tanggal_str = "2024-11-22T16:00:00Z"
|
| 97 |
+
|
| 98 |
+
hasil = retrieve_berita(query, tanggal_str)
|
| 99 |
+
print(hasil)# print(retrieve_wisata("Tempat wisata terpopuler?"))
|