bookdabang commited on
Commit
12c8869
ยท
verified ยท
1 Parent(s): aab9213

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -36
app.py CHANGED
@@ -1,42 +1,56 @@
1
- from googletrans import Translator
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
- import gradio as gr
5
-
6
- # ๋ฒˆ์—ญ๊ธฐ ์ดˆ๊ธฐํ™”
7
- translator = Translator()
8
-
9
- # PetBERT ๋ชจ๋ธ ๋กœ๋”ฉ
10
- tokenizer = AutoTokenizer.from_pretrained("SAVSNET/PetBERT_ICD")
11
- model = AutoModelForSequenceClassification.from_pretrained("SAVSNET/PetBERT_ICD")
12
-
13
- def predict_korean(text_kr):
14
- # ํ•œ๊ธ€ โ†’ ์˜์–ด ๋ฒˆ์—ญ
15
- translated = translator.translate(text_kr, src='ko', dest='en').text
16
- print(f"๐Ÿ” ๋ฒˆ์—ญ๋œ ๋ฌธ์žฅ: {translated}")
17
-
18
- # ๋ชจ๋ธ ์ž…๋ ฅ ์ฒ˜๋ฆฌ
19
- inputs = tokenizer(translated, return_tensors="pt", truncation=True, padding=True)
20
- with torch.no_grad():
21
- logits = model(**inputs).logits
22
- probs = torch.sigmoid(logits)
23
-
24
- threshold = 0.3 # ๋‚ฎ์€ ์ž„๊ณ„๊ฐ’
25
- predicted = (probs > threshold).nonzero(as_tuple=True)[1]
26
-
27
- if len(predicted) == 0:
28
- return {"์˜ˆ์ธก๋œ ์งˆ๋ณ‘ ์—†์Œ": "๐Ÿถ"}
29
-
30
- result = {model.config.id2label[idx.item()]: f"{probs[0][idx].item():.2f}" for idx in predicted}
31
- return result
32
-
33
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  demo = gr.Interface(
35
- fn=predict_korean,
36
- inputs=gr.Textbox(label="๋ฐ˜๋ ค๋™๋ฌผ ์ฆ์ƒ ์ž…๋ ฅ", placeholder="์˜ˆ: ๊ฐ•์•„์ง€๊ฐ€ ์ž์ฃผ ๊ธฐ์นจํ•ด์š”."),
37
- outputs="json",
38
- title="๐Ÿพ PetBERT ICD ์ˆ˜์˜์‚ฌ ์˜ˆ์ธก๊ธฐ (ํ•œ๊ธ€ ์ง€์›)",
39
- description="ํ•œ๊ธ€๋กœ ๋ฐ˜๋ ค๋™๋ฌผ ์ฆ์ƒ์„ ์ž…๋ ฅํ•˜๋ฉด ์˜์–ด๋กœ ๋ฒˆ์—ญํ•œ ๋’ค, AI๊ฐ€ ์งˆ๋ณ‘์„ ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค."
40
  )
41
 
 
42
  demo.launch()
 
1
+ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ from deep_translator import GoogleTranslator
5
+
6
+ # 1. ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
7
+ model_name = "SAVSNET/PetBERT_ICD"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
10
+
11
+ # 2. ๋ผ๋ฒจ ๋ชฉ๋ก (ICD ์ฝ”๋“œ ๋“ฑ์€ ์˜ˆ์‹œ, ํ•„์š”์‹œ HuggingFace card ์ฐธ์กฐ)
12
+ LABELS = {
13
+ 0: "์†Œํ™”๊ธฐ ์งˆํ™˜",
14
+ 1: "ํ˜ธํก๊ธฐ ์งˆํ™˜",
15
+ 2: "ํ”ผ๋ถ€ ์งˆํ™˜",
16
+ 3: "์ •์‹  ํ–‰๋™ ์ด์ƒ",
17
+ 4: "๊ฐ์—ผ์„ฑ ์งˆํ™˜",
18
+ 5: "๊ธฐํƒ€ ์งˆ๋ณ‘"
19
+ }
20
+
21
+ # 3. ์˜ˆ์ธก ํ•จ์ˆ˜ ์ •์˜
22
+ def predict(text):
23
+ try:
24
+ # ๋ฒˆ์—ญ (ํ•œ๊ธ€ -> ์˜์–ด)
25
+ translated = GoogleTranslator(source='auto', target='en').translate(text)
26
+
27
+ # ํ† ํฐํ™” ๋ฐ ๋ชจ๋ธ ์˜ˆ์ธก
28
+ inputs = tokenizer(translated, return_tensors="pt", truncation=True, padding=True)
29
+ with torch.no_grad():
30
+ outputs = model(**inputs)
31
+ logits = outputs.logits
32
+ probs = torch.softmax(logits, dim=1).squeeze()
33
+
34
+ # ์˜ˆ์ธก ๊ฒฐ๊ณผ ์ƒ์œ„ 3๊ฐœ ์ถ”์ถœ
35
+ topk = torch.topk(probs, 3)
36
+ result = {
37
+ LABELS.get(int(idx), f"Label {idx}") : f"{prob:.2%}"
38
+ for idx, prob in zip(topk.indices, topk.values)
39
+ if float(prob) > 0.1 # 10% ์ด์ƒ๋งŒ ๋ณด์—ฌ์คŒ
40
+ }
41
+
42
+ return result if result else {"์˜ˆ์ธก๋œ ์งˆ๋ณ‘ ์—†์Œ": "๐Ÿซฅ"}
43
+ except Exception as e:
44
+ return {"์˜ค๋ฅ˜ ๋ฐœ์ƒ": str(e)}
45
+
46
+ # 4. Gradio UI ๊ตฌ์„ฑ
47
  demo = gr.Interface(
48
+ fn=predict,
49
+ inputs=gr.Textbox(label="๋ฐ˜๋ ค๋™๋ฌผ ์ฆ์ƒ ์ž…๋ ฅ", placeholder="์˜ˆ: ๊ฐ•์•„์ง€๊ฐ€ ์ž์ฃผ ๊ธฐ์นจํ•ด"),
50
+ outputs=gr.JSON(label="์˜ˆ์ธก ์งˆ๋ณ‘ ๋ชฉ๋ก"),
51
+ title="๐Ÿพ PetBERT ICD ์ˆ˜์˜์‚ฌ ์˜ˆ์ธก๊ธฐ",
52
+ description="๋ฐ˜๋ ค๋™๋ฌผ์˜ ์ฆ์ƒ ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜๋ฉด AI๊ฐ€ ์งˆ๋ณ‘ ๊ฐ€๋Šฅ์„ฑ์„ ์˜ˆ์ธกํ•ด๋“œ๋ฆฝ๋‹ˆ๋‹ค."
53
  )
54
 
55
+ # 5. ์•ฑ ์‹คํ–‰
56
  demo.launch()