|
|
import gradio as gr |
|
|
import os |
|
|
import spaces |
|
|
import torch |
|
|
|
|
|
from transformers import GemmaTokenizer, AutoModelForCausalLM |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
|
from threading import Thread |
|
|
|
|
|
|
|
|
DESCRIPTION = ''' |
|
|
<div> |
|
|
<h1 style="text-align: center;">Indonesian Legal Question and Answer</h1> |
|
|
</div> |
|
|
''' |
|
|
|
|
|
LICENSE = """ |
|
|
<p/> |
|
|
--- |
|
|
Built with Qwen2.5 |
|
|
""" |
|
|
|
|
|
PLACEHOLDER = """ |
|
|
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> |
|
|
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Qwen2.5 1.5B Instruct</h1> |
|
|
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Finetuned with Indonesian Legal</p> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
css = """ |
|
|
h1 { |
|
|
text-align: center; |
|
|
display: block; |
|
|
} |
|
|
#duplicate-button { |
|
|
margin: auto; |
|
|
color: white; |
|
|
background: #1565c0; |
|
|
border-radius: 100vh; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
model_name = 'Azzindani/Qwen2.5_1.5B_IT_ID_Legal' |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code = True, torch_dtype = torch.float16) |
|
|
model.to('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
|
|
@spaces.GPU() |
|
|
def chat(message, history, temperature = 0.7, max_new_tokens = 1024, show_thinking = True): |
|
|
SYSTEM_PROMPT = f""" |
|
|
Anda adalah asisten AI yang ahli di bidang hukum Indonesia. Tugas Anda adalah membantu menganalisis masalah hukum secara logis, terstruktur, dan berdasarkan peraturan perundang-undangan yang relevan. |
|
|
Gunakan langkah-langkah berikut saat menjawab: |
|
|
<think> |
|
|
## 1. Apa Masalahnya? |
|
|
- Jelaskan inti permasalahan hukum dari pertanyaan |
|
|
- Apa tujuan hukum atau kepentingan yang ingin dicapai? |
|
|
- Apakah ada pelanggaran, sengketa, atau kewajiban yang dipertanyakan? |
|
|
## 2. Apa Saja yang Perlu Dipahami? |
|
|
- Identifikasi undang-undang, peraturan, atau yurisprudensi yang relevan (contoh: KUHP, KUHPerdata, UU Perlindungan Konsumen, dsb.) |
|
|
- Apakah ada informasi yang kurang dari sisi fakta atau dokumen? |
|
|
- Apakah ada sudut pandang atau interpretasi hukum yang mungkin berbeda? |
|
|
## 3. Bagaimana Analisisnya? |
|
|
- Pecah masalah menjadi bagian yang lebih kecil |
|
|
- Hubungkan fakta dengan norma hukum |
|
|
- Evaluasi kekuatan atau kelemahan argumen berdasarkan pasal atau aturan yang berlaku |
|
|
## 4. Apa Saja Solusinya? |
|
|
- Berikan beberapa alternatif penyelesaian atau pendapat hukum |
|
|
- Sertakan rujukan pasal atau dasar hukum untuk masing-masing opsi |
|
|
- Bandingkan konsekuensi atau risikonya |
|
|
## 5. Apakah Solusinya Kuat? |
|
|
- Uji kekuatan argumen dengan aturan hukum dan potensi risiko |
|
|
- Periksa apakah solusi bisa dipertahankan secara hukum di forum yang relevan (pengadilan, arbitrase, mediasi, dsb.) |
|
|
- Pertimbangkan preseden, bukti, dan beban pembuktian |
|
|
## 6. Apa Rekomendasinya? |
|
|
- Tunjukkan solusi terbaik dan alasan hukumnya |
|
|
- Ringkas hasil analisis dan referensi hukum yang mendasarinya |
|
|
- Sebutkan hal yang masih belum pasti dan faktor risiko |
|
|
- Sarankan langkah praktis selanjutnya yang dapat dilakukan pengguna |
|
|
</think> |
|
|
<answer> |
|
|
Tuliskan jawaban akhir secara jelas, ringkas, dan profesional. Gunakan bahasa hukum yang mudah dipahami. Sertakan referensi hukum Indonesia yang relevan (misalnya: Pasal 1365 KUHPerdata, Pasal 378 KUHP, UU No. 8 Tahun 1999 tentang Perlindungan Konsumen, dst). |
|
|
</answer> |
|
|
""" |
|
|
|
|
|
conversation = [{'role' : 'system', 'content' : SYSTEM_PROMPT}] |
|
|
for user_msg, assistant_msg in history: |
|
|
conversation.append({'role' : 'user', 'content' : user_msg}) |
|
|
conversation.append({'role' : 'assistant', 'content' : assistant_msg}) |
|
|
conversation.append({'role' : 'user', 'content' : message}) |
|
|
|
|
|
|
|
|
try: |
|
|
input_ids = tokenizer.apply_chat_template( |
|
|
conversation, |
|
|
tokenize = True, |
|
|
add_generation_prompt = True, |
|
|
return_tensors = 'pt' |
|
|
).to(model.device) |
|
|
except Exception as e: |
|
|
return f"Error preparing input: {str(e)}" |
|
|
|
|
|
streamer = TextIteratorStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True) |
|
|
generate_kwargs = { |
|
|
'input_ids' : input_ids, |
|
|
'streamer' : streamer, |
|
|
'max_new_tokens' : max_new_tokens, |
|
|
'do_sample' : temperature > 0, |
|
|
'temperature' : temperature if temperature > 0 else 1.0, |
|
|
'top_p' : 0.9, |
|
|
'repetition_penalty' : 1.1 |
|
|
} |
|
|
|
|
|
|
|
|
thread = Thread(target = model.generate, kwargs = generate_kwargs) |
|
|
thread.start() |
|
|
|
|
|
|
|
|
thinking_content = [] |
|
|
final_answer = [] |
|
|
live_output = [] |
|
|
in_thinking_block = False |
|
|
in_answer_block = False |
|
|
saw_think_tag = False |
|
|
saw_answer_tag = False |
|
|
thinking_header_shown = False |
|
|
|
|
|
|
|
|
has_started_output = False |
|
|
accumulated_text = '' |
|
|
|
|
|
for new_text in streamer: |
|
|
accumulated_text += new_text |
|
|
has_started_output = True |
|
|
|
|
|
|
|
|
if '<think>' in new_text: |
|
|
in_thinking_block = True |
|
|
saw_think_tag = True |
|
|
new_text = new_text.replace('<think>', '') |
|
|
if show_thinking and not thinking_header_shown: |
|
|
live_output.append('\n🧠 **Thinking...**\n') |
|
|
thinking_header_shown = True |
|
|
|
|
|
if '</think>' in new_text: |
|
|
in_thinking_block = False |
|
|
new_text = new_text.replace('</think>', '') |
|
|
|
|
|
if '<answer>' in new_text: |
|
|
in_thinking_block = False |
|
|
in_answer_block = True |
|
|
saw_answer_tag = True |
|
|
new_text = new_text.replace('<answer>', '') |
|
|
if show_thinking: |
|
|
live_output.append('\n\n-----\n✅ **Answer:**\n') |
|
|
|
|
|
if '</answer>' in new_text: |
|
|
in_answer_block = False |
|
|
new_text = new_text.replace('</answer>', '') |
|
|
|
|
|
|
|
|
if saw_think_tag or saw_answer_tag: |
|
|
|
|
|
if in_thinking_block: |
|
|
thinking_content.append(new_text) |
|
|
if show_thinking: |
|
|
live_output.append(new_text) |
|
|
elif in_answer_block or (saw_answer_tag and not in_thinking_block): |
|
|
final_answer.append(new_text) |
|
|
if show_thinking: |
|
|
live_output.append(new_text) |
|
|
else: |
|
|
yield ''.join(final_answer) |
|
|
else: |
|
|
|
|
|
|
|
|
if len(accumulated_text) > 20 and not saw_think_tag and not saw_answer_tag: |
|
|
|
|
|
|
|
|
if not thinking_header_shown and show_thinking: |
|
|
live_output.append("\n⏭️ **I can't think right now**\n\n") |
|
|
thinking_header_shown = True |
|
|
|
|
|
final_answer.append(new_text) |
|
|
if show_thinking: |
|
|
live_output.append(new_text) |
|
|
else: |
|
|
yield "".join(final_answer) |
|
|
elif show_thinking: |
|
|
|
|
|
|
|
|
live_output.append(new_text) |
|
|
|
|
|
|
|
|
if show_thinking: |
|
|
yield ''.join(live_output) |
|
|
|
|
|
|
|
|
if show_thinking and thinking_content: |
|
|
|
|
|
final_output = ( |
|
|
'<details><summary>🧠 <b>Thinking Process (click to collapse)</b></summary>\n\n' |
|
|
+ ''.join(thinking_content) + |
|
|
'\n</details>\n\n' |
|
|
+ '-----\n✅ **Answer:**\n' |
|
|
+ ''.join(final_answer) |
|
|
) |
|
|
yield final_output |
|
|
elif not show_thinking: |
|
|
yield ''.join(final_answer) |
|
|
|
|
|
|
|
|
if not final_answer and has_started_output: |
|
|
final_answer = [accumulated_text] |
|
|
if not show_thinking: |
|
|
yield accumulated_text |
|
|
|
|
|
|
|
|
history.append((message, ''.join(final_answer))) |
|
|
return None |
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot(height = 500, placeholder = PLACEHOLDER, label = 'Gradio ChatInterface') |
|
|
|
|
|
with gr.Blocks(fill_height = True, css = css) as demo: |
|
|
|
|
|
gr.Markdown(DESCRIPTION) |
|
|
|
|
|
gr.ChatInterface( |
|
|
fn = chat, |
|
|
chatbot = chatbot, |
|
|
fill_height = True, |
|
|
additional_inputs_accordion = gr.Accordion(label = '⚙️ Parameters', open = False, render = False), |
|
|
additional_inputs = [ |
|
|
gr.Slider(minimum = 0, |
|
|
maximum = 1, |
|
|
step = 0.1, |
|
|
value = 0.6, |
|
|
label = 'Temperature', |
|
|
render = False), |
|
|
gr.Slider(minimum = 128, |
|
|
maximum = 4096, |
|
|
step = 1, |
|
|
value = 1024, |
|
|
label = 'Max new tokens', |
|
|
render = False), |
|
|
gr.Checkbox( |
|
|
label = 'Show thinking process', |
|
|
info = "Display the model's reasoning process with <think> tags", |
|
|
value = True, |
|
|
render = False), |
|
|
], |
|
|
examples = [ |
|
|
['Bagaimana syarat dan mekanisme penerbitan Sertifikat Hak Milik atas Satuan Rumah Susun/SHM sarusun oleh BPN?'], |
|
|
['Saya ingin bertanya, apakah ada pajak dari mahar/mas kawin berupa uang? Terima kasih.'], |
|
|
['Apa yang dimaksud dengan bank perantara dan bank kustodian?'], |
|
|
['Saya ingin bertanya, bagaimana bentuk perlindungan terhadap nasabah bank syariah jika dilihat dari UU Perbankan Syariah? Apa bedanya dengan perlindungan terhadap nasabah bank konvensional?'], |
|
|
['Akhir-akhir ini masyarakat tengah dihebohkan dengan rapat RUU TNI yang dilakukan oleh DPR di salah satu hotel di Jakarta. Rapat ini dinilai tertutup dan tidak transparan oleh netizen. Saya mau bertanya, bagaimana hukumnya DPR rapat di hotel?'] |
|
|
], |
|
|
cache_examples = False, |
|
|
) |
|
|
|
|
|
gr.Markdown(LICENSE) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
demo.launch() |