Spaces:
Sleeping
Sleeping
| # --- Impor Library --- | |
| import os | |
| import chromadb | |
| from google import genai | |
| from google.genai import types | |
| from google.genai.types import GenerateContentConfig | |
| import gradio as gr | |
| import json | |
| import re | |
| # --- Konfigurasi Path dan Nama Koleksi --- | |
| DB_PATH = "./chroma_db" | |
| COLLECTION_NAME = "knowledge_base" | |
| # --- Konfigurasi Gemini API Key --- | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| if not gemini_api_key: | |
| print("Peringatan: GEMINI_API_KEY environment variable tidak diatur.") | |
| print("Aplikasi tidak akan dapat terhubung ke Gemini API.") | |
| # --- Inisialisasi Database ChromaDB --- | |
| client = None | |
| collection = None | |
| db_status_message = f"Initializing database from {DB_PATH}..." | |
| try: | |
| client = chromadb.PersistentClient(path=DB_PATH) | |
| db_status_message = f"ChromaDB client initialized from path: {DB_PATH}." | |
| # Create or load a collection | |
| collection = client.get_or_create_collection(name=COLLECTION_NAME) | |
| db_status_message += f" Collection '{COLLECTION_NAME}' loaded or created. Total documents: {collection.count()}." | |
| print(db_status_message) | |
| except Exception as e: | |
| db_status_message = f"Error initializing or loading ChromaDB from {DB_PATH}: {e}" | |
| print(db_status_message) | |
| client = None | |
| collection = None | |
| # --- Fungsi Retrieval --- | |
| def retrieve_documents(query, top_k=3): | |
| """ | |
| Mengambil dokumen relevan dari database vektor berdasarkan query. | |
| Args: | |
| query (str): Teks query. | |
| top_k (int): Jumlah dokumen teratas yang akan diambil. | |
| Returns: | |
| list: Daftar string dokumen yang relevan, atau list kosong jika tidak ditemukan. | |
| """ | |
| if collection is None: | |
| print("Database collection is not available.") | |
| return [] | |
| try: | |
| # ChromaDB query | |
| results = collection.query(query_texts=[query], n_results=top_k) | |
| documents = results.get("documents", [[]]) | |
| # Pastikan sublist pertama (untuk query pertama) ada dan tidak kosong | |
| return documents[0] if documents and documents[0] else [] | |
| except Exception as e: | |
| print(f"Error retrieving documents: {e}") | |
| return [] | |
| # --- Fungsi Generasi Respons --- | |
| def generate_response(query, retrieved_docs): | |
| """ | |
| Menghasilkan respons menggunakan Gemini API berdasarkan query dan dokumen yang diambil. | |
| Args: | |
| query (str): Teks query asli. | |
| retrieved_docs (list): Daftar string dokumen yang relevan. | |
| Returns: | |
| tuple: (parsed_response_dict, question_str, answer_str, context_str) | |
| parsed_response_dict adalah dictionary hasil parsing JSON. | |
| question_str, answer_str, context_str adalah nilai dari key yang diekstrak. | |
| Mengembalikan tuple dengan nilai default jika terjadi error. | |
| """ | |
| if not gemini_api_key: | |
| print("Gemini API key tidak tersedia. Tidak dapat menghasilkan respons.") | |
| return {}, query, "Error: Gemini API key is not set. Please configure it in settings.", "" | |
| # Konstruksi context dari dokumen yang diambil | |
| if not retrieved_docs: | |
| print("No relevant documents found for the query.") | |
| context = "Tidak ada informasi relevan dari buku yang ditemukan." | |
| context_source_message = "(Tidak ada konteks dari buku)" | |
| else: | |
| # Gunakan pemisah yang jelas antar dokumen di dalam context | |
| context = "\n---\n".join(retrieved_docs) | |
| context_source_message = "(Berdasarkan konteks dari buku Farr's)" | |
| # Konstruksi prompt untuk Gemini | |
| prompt = f"Context:\n---\n{context}\n---\n\nQuestion: {query}\n\nAnswer (dalam format JSON):\n" | |
| try: | |
| # Menggunakan environment variable untuk API key | |
| client = genai.Client(api_key=gemini_api_key) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=[prompt], | |
| config=GenerateContentConfig( | |
| temperature=0.2, | |
| top_p=0.2, | |
| system_instruction=[ | |
| "Anda adalah seorang asisten fisikawan medis yang sedang diperintahkan untuk membimbing calon fisikawan medis mempelajari buku Farr's Physics for Medical Imaging.", | |
| "Jawab setiap pertanyaan berdasarkan dokumen yang diberikan yang bersumber dari buku tersebut. Anda dapat memberikan wawasan tambahan di luar context sebagai pendukung namun tidak terlalu menyimpang.", | |
| "Interaksi hanya berlangsung dalam Bahasa Indonesia.", | |
| '''Berikan response sebagai jawaban dalam format dict/JSON berikut:\n{"Question":"...", "Answer":"...", "Konteks dalam buku":"..."}''', | |
| "OUTPUT HANYA BERUPA JSON, TANPA TEXT, MARKDOWN, ATAU KARAKTER TAMBAHAN SEBELUM ATAU SESUDAH JSON.", | |
| "Jangan menyertakan blok kode markdown JSON (```json ... ```) di sekitar output JSON.", | |
| "Jika pertanyaan tidak ada dalam konteks, jawab saja sebatas wawasan seorang fisikawan medis yang berpengalaman lebih dari 20 tahun" | |
| ], | |
| response_mime_type="application/json", | |
| ) | |
| ) | |
| if not hasattr(response, 'text') or not response.text: | |
| print("Gemini API returned empty response text.") | |
| return {}, query, "Error: Empty response from AI.", "" | |
| response_text_cleaned = response.text.strip() | |
| json_match = re.search(r'\{.*\}', response_text_cleaned, re.DOTALL) | |
| if json_match: | |
| json_string = json_match.group(0) | |
| try: | |
| parsed_response = json.loads(json_string) | |
| question = parsed_response.get("Question", query) | |
| answer = parsed_response.get("Answer", "Maaf, saya tidak dapat menemukan jawaban yang relevan di buku.") | |
| context_in_book = parsed_response.get("Konteks dalam buku", context_source_message) | |
| return parsed_response, question, answer, context_in_book | |
| except json.JSONDecodeError as e: | |
| print(f"Error parsing JSON response from Gemini API: {e}") | |
| print(f"Raw response text (attempted parse): {json_string}") | |
| return {}, query, f"Error: Failed to parse AI response as JSON. Details: {e}", "" | |
| else: | |
| print(f"Could not find a valid JSON object in the response: {response_text_cleaned}") | |
| return {}, query, "Error: Invalid JSON response format from AI.", "" | |
| except Exception as e: | |
| print(f"Error generating response from Gemini API: {e}") | |
| return {}, query, f"Error: An API error occurred. Details: {e}", "" | |
| # --- Proses Utama Chatbot --- | |
| def main_process(input_text): | |
| """ | |
| Menjalankan pipeline RAG: retrieve dokumen, generate respons, dan parsing. | |
| Args: | |
| input_text (str): Input query dari pengguna. | |
| Returns: | |
| tuple: (question_str, answer_str, context_in_book_str) untuk output Gradio. | |
| Mengembalikan pesan error jika DB tidak tersedia. | |
| """ | |
| if collection is None: | |
| return input_text, "Database knowledge base tidak tersedia. Mohon periksa log untuk detail error.", db_status_message | |
| if not input_text: | |
| return "", "Silakan masukkan pertanyaan Anda.", "" | |
| print(f"Processing query: '{input_text}'") | |
| # Langkah 1: Retrieve dokumen relevan | |
| context_docs = retrieve_documents(input_text, top_k=3) | |
| if not context_docs: | |
| print("No relevant documents found in the database.") | |
| # Langkah 2: Generate respons menggunakan Gemini | |
| parsed_response, question, answer, context_in_book = generate_response(input_text, context_docs) | |
| # Langkah 3: Mengembalikan hasil yang siap ditampilkan di Gradio | |
| # Mengembalikan question, answer, dan context_in_book | |
| return question, answer, context_in_book | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Ask to Farr's Physics for Medical Imaging Book with Gemini Assistant") | |
| # Tampilkan status database di UI | |
| db_status_markdown = gr.Markdown(db_status_message) | |
| input_textbox = gr.Textbox( | |
| label="Ask Me Anything 😊", | |
| placeholder="Insert your question here", | |
| type="text", | |
| lines=2 | |
| ) | |
| run_btn = gr.Button("💬 Ask") | |
| gr.Markdown("## Response") | |
| output_question = gr.Textbox(label="Your Question:", lines=2, interactive=False) | |
| output_answer = gr.Textbox(label="The Answer:", lines=10, interactive=False) | |
| output_context = gr.Textbox(label="Context From Book:", lines=5, interactive=False) | |
| # Map inputs dan outputs ke fungsi main_process saat tombol ditekan | |
| run_btn.click( | |
| fn=main_process, | |
| inputs=[input_textbox], | |
| outputs=[output_question, output_answer, output_context] | |
| ) | |
| demo.launch(debug=True) |