jhprasetyo's picture
Update app.py
a0b40ec verified
import gradio as gr
from huggingface_hub import InferenceClient
import pickle
import faiss
import numpy as np
import torch
import os
from transformers import AutoTokenizer, AutoModel
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
api = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api)
# Load IndoLegalBERT
tokenizer = AutoTokenizer.from_pretrained("archi-ai/Indo-LegalBERT")
model = AutoModel.from_pretrained("archi-ai/Indo-LegalBERT")
# Pooling dengan mean pooling
def get_embedding(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
with torch.no_grad():
outputs = model(**inputs)
last_hidden = outputs.last_hidden_state
mask = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden.size()).float()
masked = last_hidden * mask
summed = torch.sum(masked, 1)
counts = torch.clamp(mask.sum(1), min=1e-9)
mean_pooled = summed / counts
return mean_pooled.squeeze().numpy()
# Generate all embeddings
embeddings = np.array([get_embedding(text) for text in texts])
# 5. Simpan ke FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# 6. Simpan FAISS index dan metadata
faiss.write_index(index, "legal_index.faiss")
with open("legal_metadata.pkl", "wb") as f:
pickle.dump(titles, f)
# 2. Load FAISS index dan metadata
index = faiss.read_index("legal_index.faiss")
with open("legal_metadata.pkl", "rb") as f:
metadata = pickle.load(f)
# 4. Fungsi pencarian pasal hukum terkait
def search_laws(query, top_k=3):
vec = get_embedding(query).reshape(1, -1)
D, I = index.search(vec, top_k)
results = []
for i in I[0]:
if i < len(metadata):
results.append(f"- {metadata[i]}\n{texts[i]}")
return results
# 5. Fungsi untuk membentuk prompt ke OpenAI
def build_prompt(query, contexts):
context_text = "\n\n".join(contexts)
return f"""
Anda adalah asisten hukum berbasis hukum Indonesia.
Permintaan pengguna:
\"{query}\"
Gunakan konteks hukum berikut:
{context_text}
Berikan penjelasan hukum yang sistematis dan profesional. Sebutkan pasal hukum jika ada.
"""
# 6. Fungsi untuk interaksi LLM (pakai GPT-3.5 Turbo)
openai.api_key = "YOUR_OPENAI_API_KEY" # <- Ganti dengan API key milikmu
def ask_llm(query):
contexts = search_laws(query)
prompt = build_prompt(query, contexts)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Anda adalah ahli hukum Indonesia."},
{"role": "user", "content": prompt}
],
temperature=0.2,
# max_tokens=512,
)
return response.choices[0].message.content
# Gradio UI
# Fungsi simulasi RAG Legal Agent
def rag_legal_analysis(document_text, issue_type):
if issue_type == "Analisis Syarat Sah Perjanjian":
return ask_llm(document_text)
elif issue_type == "Deteksi Klausul Bermasalah":
return ask_llm(document_text)
elif issue_type == "Risiko Hukum Pihak Tertentu":
return ask_llm(document_text)
else:
return "Silakan pilih jenis analisis hukum yang ingin dilakukan."
# Gradio UI
with gr.Blocks(title="Naraya Smart Legal Assitant") as demo:
gr.Markdown("# 🤖 Naraya Smart Legal Assitant")
gr.Markdown("Masukkan isi perjanjian atau kontrak, lalu pilih jenis analisis hukum.")
document_input = gr.Textbox(
label="Isi Dokumen Kontrak",
lines=10,
placeholder="Masukkan isi kontrak di sini atau upload dokumen")
#document_input = gr.MultimodalTextbox(
# interactive=True,
# label="Isi Dokumen Kontrak",
# lines=10,
# placeholder="Masukkan isi kontrak di sini atau upload dokumen")
issue_type = gr.Radio(
label="Jenis Analisis Hukum",
choices=[
"Analisis Syarat Sah Perjanjian",
"Deteksi Klausul Bermasalah",
"Risiko Hukum Pihak Tertentu"
]
)
output = gr.Textbox(label="Hasil Analisis Hukum", lines=20)
analyze_button = gr.Button("🔍 Analisa Sekarang")
analyze_button.click(fn=rag_legal_analysis, inputs=[document_input, issue_type], outputs=output)
if __name__ == "__main__":
demo.launch()