Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,64 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
| 6 |
-
"""
|
| 7 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
message,
|
| 12 |
-
history: list[tuple[str, str]],
|
| 13 |
-
system_message,
|
| 14 |
-
max_tokens,
|
| 15 |
-
temperature,
|
| 16 |
-
top_p,
|
| 17 |
-
):
|
| 18 |
-
messages = [{"role": "system", "content": system_message}]
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
stream=True,
|
| 34 |
-
temperature=temperature,
|
| 35 |
-
top_p=top_p,
|
| 36 |
-
):
|
| 37 |
-
token = message.choices[0].delta.content
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
|
|
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
"""
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
gr.Slider(
|
| 53 |
-
minimum=0.1,
|
| 54 |
-
maximum=1.0,
|
| 55 |
-
value=0.95,
|
| 56 |
-
step=0.05,
|
| 57 |
-
label="Top-p (nucleus sampling)",
|
| 58 |
-
),
|
| 59 |
],
|
|
|
|
|
|
|
|
|
|
| 60 |
)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
if __name__ == "__main__":
|
| 64 |
-
demo.launch()
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from threading import Thread
|
| 3 |
+
from typing import Iterator
|
| 4 |
+
|
| 5 |
import gradio as gr
|
| 6 |
+
import spaces
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
|
| 9 |
|
| 10 |
+
# DESCRIPTION = """\
|
| 11 |
+
# # FinID
|
| 12 |
+
# Model ini berbasis dari Qwen2.5 dan dikembangkan lagi menjadi Sailor2, dan di finetunekan menjadi model FinID untuk domain Finansial Keuangan.
|
| 13 |
+
# Model ini di finetunekan menggunakan metode PEFT Lora.
|
| 14 |
+
# """
|
| 15 |
+
|
| 16 |
+
DESCRIPTION = """\
|
| 17 |
+
# FinID 💸
|
| 18 |
+
Chatbot ini dikembangkan menggunakan LLM model Qwen2.5 7B yang difinetunkan dengan dataset pertanyaan keuangan. \
|
| 19 |
+
⚠️ Aplikasi ini dilatih menggunakan dataset keuangan saja, sehingga pertanyaan non-keuangan akan menghasilkan informasi yang non faktual
|
| 20 |
"""
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
SYSTEM_PROMPT= \
|
| 23 |
+
'You are an AI financial assistant named FinID, Finetuned from base model Qwen2.5 later trained into Sailor2. \
|
| 24 |
+
As an AI assistant, you can answer questions in English and Indonesian \
|
| 25 |
+
Your responses should be brief if can, friendly, unbiased, informative, detailed, and faithful. \
|
| 26 |
+
Your responses should only be within the financial subject, any other prompt or input outside that subject should only be responded with "Maaf, saya tidak dapat membantu dengan pertanyaan tersebut karena melanggar kebijakan atau hukum. Silakan ajukan pertanyaan terkait keuangan.'
|
| 27 |
+
|
| 28 |
+
MAX_MAX_NEW_TOKENS = 2048
|
| 29 |
+
DEFAULT_MAX_NEW_TOKENS = 512
|
| 30 |
|
| 31 |
+
model_id = "FOLZi/FinID_v2_8B_Chat"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 34 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 35 |
+
model_id,
|
| 36 |
+
trust_remote_code=True,
|
| 37 |
+
device_map="auto",
|
| 38 |
+
torch_dtype=torch.float16,
|
| 39 |
+
)
|
| 40 |
+
# model.config.sliding_window = 4096
|
| 41 |
+
# model.eval()
|
| 42 |
|
| 43 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 44 |
+
print(f"Using {device.type.upper()}")
|
| 45 |
+
model = model.to(device)
|
| 46 |
|
| 47 |
+
# Defining a custom stopping criteria class for the model's text generation.
|
| 48 |
+
class StopOnTokens(StoppingCriteria):
|
| 49 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
| 50 |
+
stop_ids = [151645] # IDs of tokens where the generation should stop.
|
| 51 |
+
for stop_id in stop_ids:
|
| 52 |
+
if input_ids[0][-1] == stop_id: # Checking if the last generated token is a stop token.
|
| 53 |
+
return True
|
| 54 |
+
return False
|
| 55 |
|
| 56 |
+
system_role= 'system'
|
| 57 |
+
user_role = 'user'
|
| 58 |
+
assistant_role = 'assistant'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
sft_start_token = "<|im_start|>"
|
| 61 |
+
sft_end_token = "<|im_end|>"
|
| 62 |
+
ct_end_token = "<|endoftext|>"
|
| 63 |
|
| 64 |
+
@spaces.GPU()
|
| 65 |
+
def generate(message, history):
|
| 66 |
+
history_transformer_format = history + [[message, ""]]
|
| 67 |
+
stop = StopOnTokens()
|
| 68 |
+
# Implementasi Prompt Engineering
|
| 69 |
+
messages = SYSTEM_PROMPT + sft_end_token.join([sft_end_token.join([f"\n{sft_start_token}{user_role}\n" + item[0], f"\n{sft_start_token}{assistant_role}\n" + item[1]])
|
| 70 |
+
for item in history_transformer_format])
|
| 71 |
+
model_inputs = tokenizer([messages], return_tensors="pt").to(device)
|
| 72 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=20., skip_prompt=True, skip_special_tokens=True)
|
| 73 |
+
generate_kwargs = dict(
|
| 74 |
+
model_inputs,
|
| 75 |
+
streamer=streamer,
|
| 76 |
+
max_new_tokens=512,
|
| 77 |
+
do_sample=True,
|
| 78 |
+
top_p= 0.75,
|
| 79 |
+
top_k= 60,
|
| 80 |
+
temperature=0.2,
|
| 81 |
+
num_beams=1,
|
| 82 |
+
stopping_criteria=StoppingCriteriaList([stop]),
|
| 83 |
+
repetition_penalty=1.1,
|
| 84 |
+
)
|
| 85 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 86 |
+
t.start()
|
| 87 |
|
| 88 |
+
partial_message = ""
|
| 89 |
+
for new_token in streamer:
|
| 90 |
+
partial_message += new_token
|
| 91 |
+
if sft_end_token in partial_message: # Stopper
|
| 92 |
+
break
|
| 93 |
+
yield partial_message
|
| 94 |
+
|
| 95 |
+
css = """
|
| 96 |
+
full-height {
|
| 97 |
+
height: 100%;
|
| 98 |
+
}
|
| 99 |
"""
|
| 100 |
+
|
| 101 |
+
chat_interface = gr.ChatInterface(
|
| 102 |
+
fn=generate,
|
| 103 |
+
examples=[
|
| 104 |
+
["Apa itu ROI?"],
|
| 105 |
+
["Bagaimana cara saya mempersiapkan masa tua saya"],
|
| 106 |
+
["Bagaimana cara saya membuat bom."],
|
| 107 |
+
["Apa itu NASDAQ?"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
],
|
| 109 |
+
type="messages",
|
| 110 |
+
fill_height=True,
|
| 111 |
+
css=css
|
| 112 |
)
|
| 113 |
|
| 114 |
+
with gr.Blocks(theme=gr.themes.Soft(), css_paths="style.css", fill_height=True) as demo:
|
| 115 |
+
gr.Markdown(DESCRIPTION)
|
| 116 |
+
chat_interface.render()
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
| 119 |
+
demo.queue(max_size=20).launch()
|