Spaces:
Sleeping
Sleeping
| from typing import List, Dict, Any | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain_openai import OpenAIEmbeddings | |
| from pinecone import Pinecone | |
| import json | |
| import calendar | |
| from datetime import datetime, timedelta | |
| from langchain_community.tools import WikipediaQueryRun | |
| from langchain_community.utilities import WikipediaAPIWrapper | |
| import os | |
| OPENAI_API_KEY= os.getenv("OPENAI_API_KEY") | |
| PINECONE_API_KEY= os.getenv("PINECONE_API_KEY") | |
| # Initialize Pinecone client | |
| pc = Pinecone(api_key=PINECONE_API_KEY) | |
| index = "mage-x-embeddings-all" | |
| index = pc.Index(index) | |
| docsearch = PineconeVectorStore(index=index, embedding=OpenAIEmbeddings()) | |
| # Initialize Wikipedia API Wrapper and Tool | |
| api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000) | |
| wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki) | |
| def retrieve_wisata(query: str) -> str: | |
| """ | |
| Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan. | |
| """ | |
| search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_wisata'}) | |
| retrieved_texts = [] | |
| for result in search_results: | |
| text = result.page_content | |
| metadata = result.metadata.get("firebase_id", "") | |
| retrieved_texts.append({"content": text, "metadata": metadata}) | |
| return json.dumps(retrieved_texts, indent=2) | |
| def retrieve_berita(query: str) -> str: | |
| """ | |
| Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan. | |
| """ | |
| search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_berita'}) | |
| retrieved_texts = [] | |
| for result in search_results: | |
| text = result.page_content | |
| metadata = result.metadata.get("firebase_id", "") | |
| retrieved_texts.append({"content": text, "metadata": metadata}) | |
| return json.dumps(retrieved_texts, indent=2) | |
| def retrieve_umkm(query: str) -> str: | |
| """ | |
| Mengambil teks dan metadata yang relevan dari vector store UMKM berdasarkan query yang diberikan. | |
| """ | |
| search_results = docsearch.similarity_search(query, filter={'collection_type': 'data_umkm'}) | |
| retrieved_texts = [] | |
| for result in search_results: | |
| text = result.page_content | |
| metadata = result.metadata.get("firebase_id", "") | |
| retrieved_texts.append({"content": text, "metadata": metadata}) | |
| return json.dumps(retrieved_texts, indent=2) | |
| def retrieve_wikipedia_info(query: str) -> str: | |
| """ | |
| Mengambil informasi dari Wikipedia berdasarkan query yang diberikan. | |
| """ | |
| result = wiki.run(query) # Menggunakan WikipediaQueryRun untuk menjalankan pencarian | |
| return result if result else "Tidak ditemukan hasil di Wikipedia." | |
| def get_current_time(*args, **kwargs) -> str: | |
| """ | |
| Mengembalikan waktu saat ini dalam format yang mudah dibaca. | |
| """ | |
| current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| return current_time | |
| # def retrieve_berita(query: str, hari: str = None) -> str: | |
| # """ | |
| # Mengambil berita berdasarkan query dan filter hari. | |
| # """ | |
| # try: | |
| # if hari is None: | |
| # hari = datetime.now().strftime("%A") # Dapatkan hari ini (misalnya: "Monday") | |
| # # Tentukan tanggal target berdasarkan hari | |
| # today = datetime.now() | |
| # target_day = today + timedelta(days=(list(calendar.day_name).index(hari) - today.weekday()) % 7) | |
| # target_date_str = target_day.strftime("%Y-%m-%d") | |
| # # Konversi tanggal target ke timestamp | |
| # target_timestamp = int(target_day.timestamp() * 1000) | |
| # retrieved_texts = [] | |
| # max_days_back = 7 # Maksimum mundur 7 hari dari tanggal target | |
| # for i in range(max_days_back): | |
| # filter_date = target_day - timedelta(days=i) | |
| # filter_timestamp = int(filter_date.timestamp() * 1000) | |
| # search_results = docsearch.similarity_search( | |
| # query, | |
| # filter={ | |
| # 'collection_type': 'data_berita', | |
| # 'created_at': {'$gte': filter_timestamp} # Gunakan filter_timestamp | |
| # } | |
| # ) | |
| # for result in search_results: | |
| # text = result.page_content | |
| # metadata = result.metadata.get("firebase_id", "") | |
| # created_at = datetime.fromtimestamp(result.metadata['created_at']/1000).strftime("%Y-%m-%d") | |
| # if created_at == target_date_str: # Pastikan berita sesuai dengan tanggal target | |
| # retrieved_texts.append({"content": text, "metadata": metadata}) | |
| # if retrieved_texts: # Hentikan loop jika sudah mendapatkan data | |
| # break | |
| # return json.dumps(retrieved_texts, indent=2) | |
| # except ValueError: | |
| # print("Format hari tidak valid.") | |
| # return "[]" | |
| # Contoh penggunaan fungsi baru | |
| # print(retrieve_umkm("Produk UMKM terbaik?")) | |
| # print(retrieve_berita("Berita ekonomi terbaru?")) | |
| # print(retrieve_wisata("Tempat wisata terpopuler?")) |