DMID23 commited on
Commit
e3fc73a
ยท
verified ยท
1 Parent(s): 38f42d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -1,30 +1,47 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import os
4
- import subprocess
5
  import torch
6
  from huggingface_hub import login
7
 
8
- token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
9
- login(token)
 
 
 
 
10
 
11
 
12
- # ---------- STEP 1: Fine-tuned ๋ชจ๋ธ Git์—์„œ clone ----------
13
- repo_url = "DMID23/MachineToolAgent"
14
 
15
 
16
- # ---------- STEP 2: Tokenizer์™€ ๋ชจ๋ธ ๋กœ๋“œ ----------
17
- #base_model = "mistralai/Mistral-7B-v0.1"
18
- #tokenizer = AutoTokenizer.from_pretrained(base_model)
 
 
 
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
- repo_url,
21
- torch_dtype=torch.float32,
 
 
 
 
22
  )
23
- model = model.to("cpu")
 
 
 
24
 
 
25
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
26
 
27
  # ---------- STEP 3: Gradio ํ•จ์ˆ˜ ์ •์˜ ----------
 
28
  def generate_response(prompt, max_length=256, temperature=0.7):
29
  # max_length๋ฅผ ์ œํ•œํ•˜์—ฌ ์†๋„๋ฅผ ๋น ๋ฅด๊ฒŒ ํ•จ
30
  outputs = pipe(
@@ -39,6 +56,7 @@ def generate_response(prompt, max_length=256, temperature=0.7):
39
  return outputs[0]["generated_text"]
40
 
41
  # ---------- STEP 4: Gradio UI ----------
 
42
  with gr.Blocks() as demo:
43
  gr.Markdown("# ๐Ÿš€ Fine-tuned Mistral-7B (CPU Optimized)")
44
 
@@ -59,4 +77,4 @@ with gr.Blocks() as demo:
59
  )
60
 
61
  # ---------- STEP 5: Launch ----------
62
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
3
  import os
4
+ # import subprocess # ํ˜„์žฌ ์ฝ”๋“œ์—์„œ ์‚ฌ์šฉ๋˜์ง€ ์•Š์œผ๋ฏ€๋กœ ์ œ๊ฑฐ ๊ฐ€๋Šฅ
5
  import torch
6
  from huggingface_hub import login
7
 
8
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ํ† ํฐ ๊ฐ€์ ธ์˜ค๊ธฐ
9
+ token = os.environ.get("HF_TOKEN") # ์ผ๋ฐ˜์ ์œผ๋กœ "HF_TOKEN"์œผ๋กœ ์„ค์ •๋ฉ๋‹ˆ๋‹ค.
10
+ if token:
11
+ login(token)
12
+ else:
13
+ print("HF_TOKEN ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ์— ๋ฌธ์ œ๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
14
 
15
 
16
+ # ---------- STEP 1: Fine-tuned ๋ชจ๋ธ ์ •๋ณด ----------
17
+ repo_id = "DMID23/MachineToolAgent" # ๋ชจ๋ธ ์ €์žฅ์†Œ ID
18
 
19
 
20
+ # ---------- STEP 2: ์–‘์žํ™” ์„ค์ • ๋ฐ ๋ชจ๋ธ ๋กœ๋“œ ----------
21
+
22
+ # 8bit ์–‘์žํ™” ์„ค์ • (CPU ํ™˜๊ฒฝ์—์„œ๋„ ์‚ฌ์šฉ ๊ฐ€๋Šฅ)
23
+ # load_in_8bit=True ์˜ต์…˜๋งŒ์œผ๋กœ๋„ BitsAndBytesConfig ๊ฐ์ฒด๋ฅผ ์ž๋™์œผ๋กœ ์ƒ์„ฑํ•˜์—ฌ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
24
+ # CPU์—์„œ๋Š” float32 -> int8 ์–‘์žํ™”๊ฐ€ ์ฃผ๋กœ ์ผ์–ด๋‚ฉ๋‹ˆ๋‹ค.
25
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
26
+
27
  model = AutoModelForCausalLM.from_pretrained(
28
+ repo_id,
29
+ quantization_config=quantization_config, # ์–‘์žํ™” ์„ค์ • ์ ์šฉ
30
+ torch_dtype=torch.float32, # 8๋น„ํŠธ ๋กœ๋“œ ์‹œ์—๋„ ๋‚ด๋ถ€์ ์œผ๋กœ float32๋กœ ์ฒ˜๋ฆฌ๋˜๊ฑฐ๋‚˜ ํ˜ผํ•ฉ ์ •๋ฐ€๋„๋กœ ์ž‘๋™ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
31
+ # ํ•˜์ง€๋งŒ ์‹ค์ œ ๋ฉ”๋ชจ๋ฆฌ๋Š” 8๋น„ํŠธ๋งŒํผ๋งŒ ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค.
32
+ device_map="auto" # ๋ชจ๋ธ์˜ ๊ฐ ๋ ˆ์ด์–ด๋ฅผ ์ž๋™์œผ๋กœ ์ตœ์ ์˜ ์žฅ์น˜(CPU/GPU)์— ๋ถ„๋ฐฐ
33
+ # CPU๋งŒ ์žˆ๋‹ค๋ฉด CPU๋กœ ๋กœ๋“œ๋ฉ๋‹ˆ๋‹ค.
34
  )
35
+ print("Model loaded successfully.")
36
+
37
+ # ๋งŒ์•ฝ DMID23/MachineToolAgent ์ €์žฅ์†Œ์— ํ† ํฌ๋‚˜์ด์ €๊ฐ€ ์žˆ๋‹ค๋ฉด repo_id๋กœ ๋ฐ”๊พธ์„ธ์š”.
38
+ tokenizer = AutoTokenizer.from_pretrained(repo_id)
39
 
40
+ # pipe ์„ค์ • ์‹œ, device=-1 (CPU) ๋ช…์‹œ
41
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
42
 
43
  # ---------- STEP 3: Gradio ํ•จ์ˆ˜ ์ •์˜ ----------
44
+ # (์ด ๋ถ€๋ถ„์€ ๋ณ€๊ฒฝ ์—†์Œ)
45
  def generate_response(prompt, max_length=256, temperature=0.7):
46
  # max_length๋ฅผ ์ œํ•œํ•˜์—ฌ ์†๋„๋ฅผ ๋น ๋ฅด๊ฒŒ ํ•จ
47
  outputs = pipe(
 
56
  return outputs[0]["generated_text"]
57
 
58
  # ---------- STEP 4: Gradio UI ----------
59
+ # (์ด ๋ถ€๋ถ„์€ ๋ณ€๊ฒฝ ์—†์Œ)
60
  with gr.Blocks() as demo:
61
  gr.Markdown("# ๐Ÿš€ Fine-tuned Mistral-7B (CPU Optimized)")
62
 
 
77
  )
78
 
79
  # ---------- STEP 5: Launch ----------
80
+ demo.launch()