Spaces:
Sleeping
Sleeping
File size: 3,160 Bytes
b34953c e3fc73a b34953c e3fc73a b34953c 086310a 493d449 e3fc73a 7e95b18 e3fc73a b34953c e3fc73a b34953c e3fc73a b34953c e3fc73a b34953c e3fc73a 14346b1 b34953c e3fc73a b34953c e3fc73a b34953c 4f54e9b b34953c e3fc73a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import os
# import subprocess # νμ¬ μ½λμμ μ¬μ©λμ§ μμΌλ―λ‘ μ κ±° κ°λ₯
import torch
from huggingface_hub import login
# νκ²½ λ³μμμ ν ν° κ°μ Έμ€κΈ°
token = os.environ.get("HF_TOKEN") # μΌλ°μ μΌλ‘ "HF_TOKEN"μΌλ‘ μ€μ λ©λλ€.
if token:
login(token)
else:
print("HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€. λͺ¨λΈ λ€μ΄λ‘λμ λ¬Έμ κ° μμ μ μμ΅λλ€.")
# ---------- STEP 1: Fine-tuned λͺ¨λΈ μ 보 ----------
repo_id = "DMID23/MachineToolAgent" # λͺ¨λΈ μ μ₯μ ID
# ---------- STEP 2: μμν μ€μ λ° λͺ¨λΈ λ‘λ ----------
# 8bit μμν μ€μ (CPU νκ²½μμλ μ¬μ© κ°λ₯)
# load_in_8bit=True μ΅μ
λ§μΌλ‘λ BitsAndBytesConfig κ°μ²΄λ₯Ό μλμΌλ‘ μμ±νμ¬ μ μ©ν©λλ€.
# CPUμμλ float32 -> int8 μμνκ° μ£Όλ‘ μΌμ΄λ©λλ€.
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
repo_id,
quantization_config=quantization_config, # μμν μ€μ μ μ©
torch_dtype=torch.float32, # 8λΉνΈ λ‘λ μμλ λ΄λΆμ μΌλ‘ float32λ‘ μ²λ¦¬λκ±°λ νΌν© μ λ°λλ‘ μλν μ μμ΅λλ€.
# νμ§λ§ μ€μ λ©λͺ¨λ¦¬λ 8λΉνΈλ§νΌλ§ μ¬μ©λ©λλ€.
device_map="auto" # λͺ¨λΈμ κ° λ μ΄μ΄λ₯Ό μλμΌλ‘ μ΅μ μ μ₯μΉ(CPU/GPU)μ λΆλ°°
# CPUλ§ μλ€λ©΄ CPUλ‘ λ‘λλ©λλ€.
)
print("Model loaded successfully.")
# λ§μ½ DMID23/MachineToolAgent μ μ₯μμ ν ν¬λμ΄μ κ° μλ€λ©΄ repo_idλ‘ λ°κΎΈμΈμ.
tokenizer = AutoTokenizer.from_pretrained(repo_id)
# pipe μ€μ μ, device=-1 (CPU) λͺ
μ
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) #
# ---------- STEP 3: Gradio ν¨μ μ μ ----------
# (μ΄ λΆλΆμ λ³κ²½ μμ)
def generate_response(prompt, max_length=256, temperature=0.7):
# max_lengthλ₯Ό μ ννμ¬ μλλ₯Ό λΉ λ₯΄κ² ν¨
outputs = pipe(
prompt,
max_length=max_length,
temperature=temperature,
do_sample=True,
top_p=0.9,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
)
return outputs[0]["generated_text"]
# ---------- STEP 4: Gradio UI ----------
# (μ΄ λΆλΆμ λ³κ²½ μμ)
with gr.Blocks() as demo:
gr.Markdown("# π Fine-tuned Mistral-7B (CPU Optimized)")
with gr.Row():
prompt_input = gr.Textbox(label="Input Prompt", placeholder="Type your prompt here...", lines=4)
with gr.Row():
max_len_slider = gr.Slider(64, 512, value=256, step=16, label="Max Length (lower = faster)")
temp_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
generate_button = gr.Button("Generate")
output_box = gr.Textbox(label="Generated Output", lines=10)
generate_button.click(
fn=generate_response,
inputs=[prompt_input, max_len_slider, temp_slider],
outputs=output_box,
)
# ---------- STEP 5: Launch ----------
demo.launch() |