MAGE10 / tools /retrive.py
Bima Ardhia
back
1ef0172
from typing import List, Dict, Any
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from pinecone import Pinecone
import json
import calendar
from datetime import datetime, timedelta
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
import os
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY= os.getenv("PINECONE_API_KEY")
# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)
index = "mage-x-embeddings-all"
index = pc.Index(index)
docsearch = PineconeVectorStore(index=index, embedding=OpenAIEmbeddings())
# Initialize Wikipedia API Wrapper and Tool
api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
def retrieve_wisata(query: str) -> str:
"""
Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan.
"""
search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_wisata'})
retrieved_texts = []
for result in search_results:
text = result.page_content
metadata = result.metadata.get("firebase_id", "")
retrieved_texts.append({"content": text, "metadata": metadata})
return json.dumps(retrieved_texts, indent=2)
def retrieve_berita(query: str) -> str:
"""
Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan.
"""
search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_berita'})
retrieved_texts = []
for result in search_results:
text = result.page_content
metadata = result.metadata.get("firebase_id", "")
retrieved_texts.append({"content": text, "metadata": metadata})
return json.dumps(retrieved_texts, indent=2)
def retrieve_umkm(query: str) -> str:
"""
Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan.
"""
search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_umkm'})
retrieved_texts = []
for result in search_results:
text = result.page_content
metadata = result.metadata.get("firebase_id", "")
retrieved_texts.append({"content": text, "metadata": metadata})
return json.dumps(retrieved_texts, indent=2)
def retrieve_wikipedia_info(query: str) -> str:
"""
Mengambil informasi dari Wikipedia berdasarkan query yang diberikan.
"""
result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian
return result if result else "Tidak ditemukan hasil di Wikipedia."
def get_current_time(*args, **kwargs) -> str:
"""
Mengembalikan waktu saat ini dalam format yang mudah dibaca.
"""
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return current_time
# def retrieve_berita(query: str, hari: str = None) -> str:
# """
# Mengambil berita berdasarkan query dan filter hari.
# """
# try:
# if hari is None:
# hari = datetime.now().strftime("%A") # Dapatkan hari ini (misalnya: "Monday")
# # Tentukan tanggal target berdasarkan hari
# today = datetime.now()
# target_day = today + timedelta(days=(list(calendar.day_name).index(hari) - today.weekday()) % 7)
# target_date_str = target_day.strftime("%Y-%m-%d")
# # Konversi tanggal target ke timestamp
# target_timestamp = int(target_day.timestamp() * 1000)
# retrieved_texts = []
# max_days_back = 7 # Maksimum mundur 7 hari dari tanggal target
# for i in range(max_days_back):
# filter_date = target_day - timedelta(days=i)
# filter_timestamp = int(filter_date.timestamp() * 1000)
# search_results = docsearch.similarity_search(
# query,
# filter={
# 'collection_type': 'data_berita',
# 'created_at': {'$gte': filter_timestamp} # Gunakan filter_timestamp
# }
# )
# for result in search_results:
# text = result.page_content
# metadata = result.metadata.get("firebase_id", "")
# created_at = datetime.fromtimestamp(result.metadata['created_at']/1000).strftime("%Y-%m-%d")
# if created_at == target_date_str: # Pastikan berita sesuai dengan tanggal target
# retrieved_texts.append({"content": text, "metadata": metadata})
# if retrieved_texts: # Hentikan loop jika sudah mendapatkan data
# break
# return json.dumps(retrieved_texts, indent=2)
# except ValueError:
# print("Format hari tidak valid.")
# return "[]"
# Contoh penggunaan fungsi baru
# print(retrieve_umkm("Produk UMKM terbaik?"))
# print(retrieve_berita("Berita ekonomi terbaru?"))
# print(retrieve_wisata("Tempat wisata terpopuler?"))