import gradio as gr from huggingface_hub import InferenceClient import pickle import faiss import numpy as np import torch import os from transformers import AutoTokenizer, AutoModel from openai import OpenAI from dotenv import load_dotenv load_dotenv() api = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api) # Load IndoLegalBERT tokenizer = AutoTokenizer.from_pretrained("archi-ai/Indo-LegalBERT") model = AutoModel.from_pretrained("archi-ai/Indo-LegalBERT") # Pooling dengan mean pooling def get_embedding(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding="max_length") with torch.no_grad(): outputs = model(**inputs) last_hidden = outputs.last_hidden_state mask = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden.size()).float() masked = last_hidden * mask summed = torch.sum(masked, 1) counts = torch.clamp(mask.sum(1), min=1e-9) mean_pooled = summed / counts return mean_pooled.squeeze().numpy() # Generate all embeddings embeddings = np.array([get_embedding(text) for text in texts]) # 5. Simpan ke FAISS dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) # 6. Simpan FAISS index dan metadata faiss.write_index(index, "legal_index.faiss") with open("legal_metadata.pkl", "wb") as f: pickle.dump(titles, f) # 2. Load FAISS index dan metadata index = faiss.read_index("legal_index.faiss") with open("legal_metadata.pkl", "rb") as f: metadata = pickle.load(f) # 4. Fungsi pencarian pasal hukum terkait def search_laws(query, top_k=3): vec = get_embedding(query).reshape(1, -1) D, I = index.search(vec, top_k) results = [] for i in I[0]: if i < len(metadata): results.append(f"- {metadata[i]}\n{texts[i]}") return results # 5. Fungsi untuk membentuk prompt ke OpenAI def build_prompt(query, contexts): context_text = "\n\n".join(contexts) return f""" Anda adalah asisten hukum berbasis hukum Indonesia. Permintaan pengguna: \"{query}\" Gunakan konteks hukum berikut: {context_text} Berikan penjelasan hukum yang sistematis dan profesional. Sebutkan pasal hukum jika ada. """ # 6. Fungsi untuk interaksi LLM (pakai GPT-3.5 Turbo) openai.api_key = "YOUR_OPENAI_API_KEY" # <- Ganti dengan API key milikmu def ask_llm(query): contexts = search_laws(query) prompt = build_prompt(query, contexts) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "Anda adalah ahli hukum Indonesia."}, {"role": "user", "content": prompt} ], temperature=0.2, # max_tokens=512, ) return response.choices[0].message.content # Gradio UI # Fungsi simulasi RAG Legal Agent def rag_legal_analysis(document_text, issue_type): if issue_type == "Analisis Syarat Sah Perjanjian": return ask_llm(document_text) elif issue_type == "Deteksi Klausul Bermasalah": return ask_llm(document_text) elif issue_type == "Risiko Hukum Pihak Tertentu": return ask_llm(document_text) else: return "Silakan pilih jenis analisis hukum yang ingin dilakukan." # Gradio UI with gr.Blocks(title="Naraya Smart Legal Assitant") as demo: gr.Markdown("# 🤖 Naraya Smart Legal Assitant") gr.Markdown("Masukkan isi perjanjian atau kontrak, lalu pilih jenis analisis hukum.") document_input = gr.Textbox( label="Isi Dokumen Kontrak", lines=10, placeholder="Masukkan isi kontrak di sini atau upload dokumen") #document_input = gr.MultimodalTextbox( # interactive=True, # label="Isi Dokumen Kontrak", # lines=10, # placeholder="Masukkan isi kontrak di sini atau upload dokumen") issue_type = gr.Radio( label="Jenis Analisis Hukum", choices=[ "Analisis Syarat Sah Perjanjian", "Deteksi Klausul Bermasalah", "Risiko Hukum Pihak Tertentu" ] ) output = gr.Textbox(label="Hasil Analisis Hukum", lines=20) analyze_button = gr.Button("🔍 Analisa Sekarang") analyze_button.click(fn=rag_legal_analysis, inputs=[document_input, issue_type], outputs=output) if __name__ == "__main__": demo.launch()