Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig | |
| import os | |
| # import subprocess # νμ¬ μ½λμμ μ¬μ©λμ§ μμΌλ―λ‘ μ κ±° κ°λ₯ | |
| import torch | |
| from huggingface_hub import login | |
| # νκ²½ λ³μμμ ν ν° κ°μ Έμ€κΈ° | |
| token = os.environ.get("HF_TOKEN") # μΌλ°μ μΌλ‘ "HF_TOKEN"μΌλ‘ μ€μ λ©λλ€. | |
| if token: | |
| login(token) | |
| else: | |
| print("HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€. λͺ¨λΈ λ€μ΄λ‘λμ λ¬Έμ κ° μμ μ μμ΅λλ€.") | |
| # ---------- STEP 1: Fine-tuned λͺ¨λΈ μ 보 ---------- | |
| repo_id = "DMID23/MachineToolAgent" # λͺ¨λΈ μ μ₯μ ID | |
| # ---------- STEP 2: μμν μ€μ λ° λͺ¨λΈ λ‘λ ---------- | |
| # 8bit μμν μ€μ (CPU νκ²½μμλ μ¬μ© κ°λ₯) | |
| # load_in_8bit=True μ΅μ λ§μΌλ‘λ BitsAndBytesConfig κ°μ²΄λ₯Ό μλμΌλ‘ μμ±νμ¬ μ μ©ν©λλ€. | |
| # CPUμμλ float32 -> int8 μμνκ° μ£Όλ‘ μΌμ΄λ©λλ€. | |
| quantization_config = BitsAndBytesConfig(load_in_8bit=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| repo_id, | |
| quantization_config=quantization_config, # μμν μ€μ μ μ© | |
| torch_dtype=torch.float32, # 8λΉνΈ λ‘λ μμλ λ΄λΆμ μΌλ‘ float32λ‘ μ²λ¦¬λκ±°λ νΌν© μ λ°λλ‘ μλν μ μμ΅λλ€. | |
| # νμ§λ§ μ€μ λ©λͺ¨λ¦¬λ 8λΉνΈλ§νΌλ§ μ¬μ©λ©λλ€. | |
| device_map="auto" # λͺ¨λΈμ κ° λ μ΄μ΄λ₯Ό μλμΌλ‘ μ΅μ μ μ₯μΉ(CPU/GPU)μ λΆλ°° | |
| # CPUλ§ μλ€λ©΄ CPUλ‘ λ‘λλ©λλ€. | |
| ) | |
| print("Model loaded successfully.") | |
| # λ§μ½ DMID23/MachineToolAgent μ μ₯μμ ν ν¬λμ΄μ κ° μλ€λ©΄ repo_idλ‘ λ°κΎΈμΈμ. | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id) | |
| # pipe μ€μ μ, device=-1 (CPU) λͺ μ | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # | |
| # ---------- STEP 3: Gradio ν¨μ μ μ ---------- | |
| # (μ΄ λΆλΆμ λ³κ²½ μμ) | |
| def generate_response(prompt, max_length=256, temperature=0.7): | |
| # max_lengthλ₯Ό μ ννμ¬ μλλ₯Ό λΉ λ₯΄κ² ν¨ | |
| outputs = pipe( | |
| prompt, | |
| max_length=max_length, | |
| temperature=temperature, | |
| do_sample=True, | |
| top_p=0.9, | |
| num_return_sequences=1, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| return outputs[0]["generated_text"] | |
| # ---------- STEP 4: Gradio UI ---------- | |
| # (μ΄ λΆλΆμ λ³κ²½ μμ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π Fine-tuned Mistral-7B (CPU Optimized)") | |
| with gr.Row(): | |
| prompt_input = gr.Textbox(label="Input Prompt", placeholder="Type your prompt here...", lines=4) | |
| with gr.Row(): | |
| max_len_slider = gr.Slider(64, 512, value=256, step=16, label="Max Length (lower = faster)") | |
| temp_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature") | |
| generate_button = gr.Button("Generate") | |
| output_box = gr.Textbox(label="Generated Output", lines=10) | |
| generate_button.click( | |
| fn=generate_response, | |
| inputs=[prompt_input, max_len_slider, temp_slider], | |
| outputs=output_box, | |
| ) | |
| # ---------- STEP 5: Launch ---------- | |
| demo.launch() |