amis5895 commited on
Commit
dbc5a3f
ยท
1 Parent(s): d2bd607

Fix permission issue - use /tmp for log file

Browse files
Files changed (2) hide show
  1. app.py +68 -23
  2. app_fixed_permissions.py +354 -0
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- ์‹ค์ œ AutoTrain์„ ์‚ฌ์šฉํ•œ EXAONE Fine-tuning Space FastAPI ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
4
  """
5
 
6
  import os
@@ -34,7 +34,7 @@ training_status = {
34
  "total_epochs": 3,
35
  "loss": 0.0,
36
  "status": "idle",
37
- "log_file": "/app/training.log"
38
  }
39
 
40
  class TrainingRequest(BaseModel):
@@ -115,9 +115,14 @@ async def run_real_training(request: TrainingRequest):
115
 
116
  logger.info("All files found, starting real AutoTrain training...")
117
 
118
- # ๋กœ๊ทธ ํŒŒ์ผ ์ดˆ๊ธฐํ™”
119
  log_file = Path(training_status["log_file"])
120
- log_file.write_text("Starting AutoTrain training...\n", encoding="utf-8")
 
 
 
 
 
121
 
122
  # AutoTrain ๋ช…๋ น์–ด ์‹คํ–‰
123
  cmd = [
@@ -147,9 +152,14 @@ async def run_real_training(request: TrainingRequest):
147
  logger.info(f"Running command: {' '.join(cmd)}")
148
 
149
  # ๋กœ๊ทธ ํŒŒ์ผ์— ๋ช…๋ น์–ด ๊ธฐ๋ก
150
- with open(log_file, "a", encoding="utf-8") as f:
151
- f.write(f"Command: {' '.join(cmd)}\n")
152
- f.write("=" * 50 + "\n")
 
 
 
 
 
153
 
154
  # AutoTrain ํ”„๋กœ์„ธ์Šค ์‹คํ–‰
155
  process = subprocess.Popen(
@@ -167,8 +177,13 @@ async def run_real_training(request: TrainingRequest):
167
  logger.info(line.strip())
168
 
169
  # ๋กœ๊ทธ ํŒŒ์ผ์— ๊ธฐ๋ก
170
- with open(log_file, "a", encoding="utf-8") as f:
171
- f.write(line)
 
 
 
 
 
172
 
173
  # ์ง„ํ–‰๋ฅ  ํŒŒ์‹ฑ
174
  if "epoch" in line.lower() and "/" in line:
@@ -210,9 +225,14 @@ async def run_real_training(request: TrainingRequest):
210
  logger.info("Training completed successfully!")
211
 
212
  # ์™„๋ฃŒ ๋กœ๊ทธ ๊ธฐ๋ก
213
- with open(log_file, "a", encoding="utf-8") as f:
214
- f.write("\n" + "=" * 50 + "\n")
215
- f.write("Training completed successfully!\n")
 
 
 
 
 
216
  else:
217
  training_status.update({
218
  "is_running": False,
@@ -221,9 +241,14 @@ async def run_real_training(request: TrainingRequest):
221
  logger.error("Training failed!")
222
 
223
  # ์‹คํŒจ ๋กœ๊ทธ ๊ธฐ๋ก
224
- with open(log_file, "a", encoding="utf-8") as f:
225
- f.write("\n" + "=" * 50 + "\n")
226
- f.write(f"Training failed with return code: {process.returncode}\n")
 
 
 
 
 
227
 
228
  except Exception as e:
229
  logger.error(f"Training error: {str(e)}")
@@ -234,8 +259,13 @@ async def run_real_training(request: TrainingRequest):
234
  })
235
 
236
  # ์˜ค๋ฅ˜ ๋กœ๊ทธ ๊ธฐ๋ก
237
- with open(log_file, "a", encoding="utf-8") as f:
238
- f.write(f"\nError: {str(e)}\n")
 
 
 
 
 
239
 
240
  @app.get("/status")
241
  async def get_status():
@@ -247,9 +277,16 @@ async def get_logs():
247
  """๋กœ๊ทธ ์กฐํšŒ"""
248
  log_file = Path(training_status["log_file"])
249
  if log_file.exists():
250
- with open(log_file, "r", encoding="utf-8") as f:
251
- logs = f.read()
252
- return {"logs": logs}
 
 
 
 
 
 
 
253
  else:
254
  return {"logs": "No logs available"}
255
 
@@ -259,9 +296,17 @@ async def stream_logs():
259
  def generate_logs():
260
  log_file = Path(training_status["log_file"])
261
  if log_file.exists():
262
- with open(log_file, "r", encoding="utf-8") as f:
263
- for line in f:
264
- yield f"data: {line}\\n\\n"
 
 
 
 
 
 
 
 
265
  else:
266
  yield "data: No logs available\\n\\n"
267
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ ๊ถŒํ•œ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•œ EXAONE Fine-tuning Space FastAPI ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
4
  """
5
 
6
  import os
 
34
  "total_epochs": 3,
35
  "loss": 0.0,
36
  "status": "idle",
37
+ "log_file": "/tmp/training.log" # ๊ถŒํ•œ ๋ฌธ์ œ ํ•ด๊ฒฐ์„ ์œ„ํ•ด /tmp ์‚ฌ์šฉ
38
  }
39
 
40
  class TrainingRequest(BaseModel):
 
115
 
116
  logger.info("All files found, starting real AutoTrain training...")
117
 
118
+ # ๋กœ๊ทธ ํŒŒ์ผ ์ดˆ๊ธฐํ™” (/tmp ์‚ฌ์šฉ)
119
  log_file = Path(training_status["log_file"])
120
+ try:
121
+ log_file.write_text("Starting AutoTrain training...\n", encoding="utf-8")
122
+ except Exception as e:
123
+ logger.warning(f"Could not write to log file: {e}")
124
+ # ๋กœ๊ทธ ํŒŒ์ผ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์— ์ €์žฅ
125
+ training_status["log_content"] = "Starting AutoTrain training...\n"
126
 
127
  # AutoTrain ๋ช…๋ น์–ด ์‹คํ–‰
128
  cmd = [
 
152
  logger.info(f"Running command: {' '.join(cmd)}")
153
 
154
  # ๋กœ๊ทธ ํŒŒ์ผ์— ๋ช…๋ น์–ด ๊ธฐ๋ก
155
+ try:
156
+ with open(log_file, "a", encoding="utf-8") as f:
157
+ f.write(f"Command: {' '.join(cmd)}\n")
158
+ f.write("=" * 50 + "\n")
159
+ except:
160
+ if "log_content" not in training_status:
161
+ training_status["log_content"] = ""
162
+ training_status["log_content"] += f"Command: {' '.join(cmd)}\n" + "=" * 50 + "\n"
163
 
164
  # AutoTrain ํ”„๋กœ์„ธ์Šค ์‹คํ–‰
165
  process = subprocess.Popen(
 
177
  logger.info(line.strip())
178
 
179
  # ๋กœ๊ทธ ํŒŒ์ผ์— ๊ธฐ๋ก
180
+ try:
181
+ with open(log_file, "a", encoding="utf-8") as f:
182
+ f.write(line)
183
+ except:
184
+ if "log_content" not in training_status:
185
+ training_status["log_content"] = ""
186
+ training_status["log_content"] += line
187
 
188
  # ์ง„ํ–‰๋ฅ  ํŒŒ์‹ฑ
189
  if "epoch" in line.lower() and "/" in line:
 
225
  logger.info("Training completed successfully!")
226
 
227
  # ์™„๋ฃŒ ๋กœ๊ทธ ๊ธฐ๋ก
228
+ try:
229
+ with open(log_file, "a", encoding="utf-8") as f:
230
+ f.write("\n" + "=" * 50 + "\n")
231
+ f.write("Training completed successfully!\n")
232
+ except:
233
+ if "log_content" not in training_status:
234
+ training_status["log_content"] = ""
235
+ training_status["log_content"] += "\n" + "=" * 50 + "\nTraining completed successfully!\n"
236
  else:
237
  training_status.update({
238
  "is_running": False,
 
241
  logger.error("Training failed!")
242
 
243
  # ์‹คํŒจ ๋กœ๊ทธ ๊ธฐ๋ก
244
+ try:
245
+ with open(log_file, "a", encoding="utf-8") as f:
246
+ f.write("\n" + "=" * 50 + "\n")
247
+ f.write(f"Training failed with return code: {process.returncode}\n")
248
+ except:
249
+ if "log_content" not in training_status:
250
+ training_status["log_content"] = ""
251
+ training_status["log_content"] += "\n" + "=" * 50 + f"\nTraining failed with return code: {process.returncode}\n"
252
 
253
  except Exception as e:
254
  logger.error(f"Training error: {str(e)}")
 
259
  })
260
 
261
  # ์˜ค๋ฅ˜ ๋กœ๊ทธ ๊ธฐ๋ก
262
+ try:
263
+ with open(log_file, "a", encoding="utf-8") as f:
264
+ f.write(f"\nError: {str(e)}\n")
265
+ except:
266
+ if "log_content" not in training_status:
267
+ training_status["log_content"] = ""
268
+ training_status["log_content"] += f"\nError: {str(e)}\n"
269
 
270
  @app.get("/status")
271
  async def get_status():
 
277
  """๋กœ๊ทธ ์กฐํšŒ"""
278
  log_file = Path(training_status["log_file"])
279
  if log_file.exists():
280
+ try:
281
+ with open(log_file, "r", encoding="utf-8") as f:
282
+ logs = f.read()
283
+ return {"logs": logs}
284
+ except:
285
+ pass
286
+
287
+ # ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
288
+ if "log_content" in training_status:
289
+ return {"logs": training_status["log_content"]}
290
  else:
291
  return {"logs": "No logs available"}
292
 
 
296
  def generate_logs():
297
  log_file = Path(training_status["log_file"])
298
  if log_file.exists():
299
+ try:
300
+ with open(log_file, "r", encoding="utf-8") as f:
301
+ for line in f:
302
+ yield f"data: {line}\\n\\n"
303
+ except:
304
+ pass
305
+
306
+ # ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
307
+ if "log_content" in training_status:
308
+ for line in training_status["log_content"].split('\n'):
309
+ yield f"data: {line}\\n\\n"
310
  else:
311
  yield "data: No logs available\\n\\n"
312
 
app_fixed_permissions.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ๊ถŒํ•œ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•œ EXAONE Fine-tuning Space FastAPI ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import subprocess
9
+ import asyncio
10
+ from pathlib import Path
11
+ from typing import Dict, Any
12
+ import logging
13
+
14
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
15
+ from fastapi.responses import StreamingResponse
16
+ from pydantic import BaseModel
17
+ import uvicorn
18
+
19
+ # ๋กœ๊น… ์„ค์ •
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ app = FastAPI(
24
+ title="EXAONE Fine-tuning",
25
+ description="EXAONE 4.0 1.2B ๋ชจ๋ธ ํŒŒ์ธํŠœ๋‹ API",
26
+ version="1.0.0"
27
+ )
28
+
29
+ # ์ „์—ญ ๋ณ€์ˆ˜
30
+ training_status = {
31
+ "is_running": False,
32
+ "progress": 0,
33
+ "current_epoch": 0,
34
+ "total_epochs": 3,
35
+ "loss": 0.0,
36
+ "status": "idle",
37
+ "log_file": "/tmp/training.log" # ๊ถŒํ•œ ๋ฌธ์ œ ํ•ด๊ฒฐ์„ ์œ„ํ•ด /tmp ์‚ฌ์šฉ
38
+ }
39
+
40
+ class TrainingRequest(BaseModel):
41
+ model_name: str = "amis5895/exaone-1p2b-nutrition-kdri"
42
+
43
+ @app.get("/")
44
+ async def root():
45
+ """๋ฃจํŠธ ์—”๋“œํฌ์ธํŠธ"""
46
+ return {
47
+ "message": "EXAONE Fine-tuning API",
48
+ "status": "running",
49
+ "version": "1.0.0"
50
+ }
51
+
52
+ @app.post("/start_training")
53
+ async def start_training(request: TrainingRequest, background_tasks: BackgroundTasks):
54
+ """ํ•™์Šต ์‹œ์ž‘"""
55
+ global training_status
56
+
57
+ if training_status["is_running"]:
58
+ raise HTTPException(status_code=400, detail="Training is already running")
59
+
60
+ training_status.update({
61
+ "is_running": True,
62
+ "progress": 0,
63
+ "current_epoch": 0,
64
+ "status": "starting"
65
+ })
66
+
67
+ # ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ํ•™์Šต ์‹œ์ž‘
68
+ background_tasks.add_task(run_real_training, request)
69
+
70
+ return {
71
+ "message": "Training started",
72
+ "status": "starting",
73
+ "model_name": request.model_name
74
+ }
75
+
76
+ async def run_real_training(request: TrainingRequest):
77
+ """์‹ค์ œ AutoTrain์„ ์‚ฌ์šฉํ•œ ํ•™์Šต ์‹คํ–‰"""
78
+ global training_status
79
+
80
+ try:
81
+ logger.info("Starting real AutoTrain training process...")
82
+ training_status["status"] = "running"
83
+
84
+ # ๋ฐ์ดํ„ฐ ํŒŒ์ผ ํ™•์ธ
85
+ train_file = Path("/app/train.csv")
86
+ val_file = Path("/app/validation.csv")
87
+ config_file = Path("/app/autotrain_ultra_low_final.yaml")
88
+
89
+ if not train_file.exists():
90
+ logger.error(f"Training file not found: {train_file}")
91
+ training_status.update({
92
+ "is_running": False,
93
+ "status": "failed",
94
+ "error": "Training file not found"
95
+ })
96
+ return
97
+
98
+ if not val_file.exists():
99
+ logger.error(f"Validation file not found: {val_file}")
100
+ training_status.update({
101
+ "is_running": False,
102
+ "status": "failed",
103
+ "error": "Validation file not found"
104
+ })
105
+ return
106
+
107
+ if not config_file.exists():
108
+ logger.error(f"Config file not found: {config_file}")
109
+ training_status.update({
110
+ "is_running": False,
111
+ "status": "failed",
112
+ "error": "Config file not found"
113
+ })
114
+ return
115
+
116
+ logger.info("All files found, starting real AutoTrain training...")
117
+
118
+ # ๋กœ๊ทธ ํŒŒ์ผ ์ดˆ๊ธฐํ™” (/tmp ์‚ฌ์šฉ)
119
+ log_file = Path(training_status["log_file"])
120
+ try:
121
+ log_file.write_text("Starting AutoTrain training...\n", encoding="utf-8")
122
+ except Exception as e:
123
+ logger.warning(f"Could not write to log file: {e}")
124
+ # ๋กœ๊ทธ ํŒŒ์ผ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์— ์ €์žฅ
125
+ training_status["log_content"] = "Starting AutoTrain training...\n"
126
+
127
+ # AutoTrain ๋ช…๋ น์–ด ์‹คํ–‰
128
+ cmd = [
129
+ "autotrain", "llm",
130
+ "--train",
131
+ "--project_name", "exaone-finetuning",
132
+ "--model", "LGAI-EXAONE/EXAONE-4.0-1.2B",
133
+ "--data_path", "/app",
134
+ "--text_column", "text",
135
+ "--use_peft",
136
+ "--quantization", "int4",
137
+ "--lora_r", "16",
138
+ "--lora_alpha", "32",
139
+ "--lora_dropout", "0.05",
140
+ "--target_modules", "all-linear",
141
+ "--epochs", "3",
142
+ "--batch_size", "4",
143
+ "--gradient_accumulation", "4",
144
+ "--learning_rate", "2e-4",
145
+ "--warmup_ratio", "0.03",
146
+ "--mixed_precision", "fp16",
147
+ "--push_to_hub",
148
+ "--hub_model_id", request.model_name,
149
+ "--username", "amis5895"
150
+ ]
151
+
152
+ logger.info(f"Running command: {' '.join(cmd)}")
153
+
154
+ # ๋กœ๊ทธ ํŒŒ์ผ์— ๋ช…๋ น์–ด ๊ธฐ๋ก
155
+ try:
156
+ with open(log_file, "a", encoding="utf-8") as f:
157
+ f.write(f"Command: {' '.join(cmd)}\n")
158
+ f.write("=" * 50 + "\n")
159
+ except:
160
+ if "log_content" not in training_status:
161
+ training_status["log_content"] = ""
162
+ training_status["log_content"] += f"Command: {' '.join(cmd)}\n" + "=" * 50 + "\n"
163
+
164
+ # AutoTrain ํ”„๋กœ์„ธ์Šค ์‹คํ–‰
165
+ process = subprocess.Popen(
166
+ cmd,
167
+ stdout=subprocess.PIPE,
168
+ stderr=subprocess.STDOUT,
169
+ text=True,
170
+ bufsize=1,
171
+ universal_newlines=True,
172
+ cwd="/app"
173
+ )
174
+
175
+ # ํ•™์Šต ์ง„ํ–‰ ์ƒํ™ฉ ๋ชจ๋‹ˆํ„ฐ๋ง
176
+ for line in process.stdout:
177
+ logger.info(line.strip())
178
+
179
+ # ๋กœ๊ทธ ํŒŒ์ผ์— ๊ธฐ๋ก
180
+ try:
181
+ with open(log_file, "a", encoding="utf-8") as f:
182
+ f.write(line)
183
+ except:
184
+ if "log_content" not in training_status:
185
+ training_status["log_content"] = ""
186
+ training_status["log_content"] += line
187
+
188
+ # ์ง„ํ–‰๋ฅ  ํŒŒ์‹ฑ
189
+ if "epoch" in line.lower() and "/" in line:
190
+ try:
191
+ # "Epoch 1/3" ํ˜•ํƒœ์—์„œ ์ง„ํ–‰๋ฅ  ์ถ”์ถœ
192
+ parts = line.split()
193
+ for i, part in enumerate(parts):
194
+ if part.lower() == "epoch" and i + 1 < len(parts):
195
+ epoch_info = parts[i + 1]
196
+ if "/" in epoch_info:
197
+ current, total = epoch_info.split("/")
198
+ training_status["current_epoch"] = int(current)
199
+ training_status["total_epochs"] = int(total)
200
+ training_status["progress"] = (int(current) / int(total)) * 100
201
+ break
202
+ except:
203
+ pass
204
+
205
+ # ์†์‹ค๊ฐ’ ํŒŒ์‹ฑ
206
+ if "loss" in line.lower():
207
+ try:
208
+ parts = line.split()
209
+ for i, part in enumerate(parts):
210
+ if part.lower() == "loss" and i + 1 < len(parts):
211
+ loss_value = float(parts[i + 1])
212
+ training_status["loss"] = loss_value
213
+ break
214
+ except:
215
+ pass
216
+
217
+ process.wait()
218
+
219
+ if process.returncode == 0:
220
+ training_status.update({
221
+ "is_running": False,
222
+ "progress": 100,
223
+ "status": "completed"
224
+ })
225
+ logger.info("Training completed successfully!")
226
+
227
+ # ์™„๋ฃŒ ๋กœ๊ทธ ๊ธฐ๋ก
228
+ try:
229
+ with open(log_file, "a", encoding="utf-8") as f:
230
+ f.write("\n" + "=" * 50 + "\n")
231
+ f.write("Training completed successfully!\n")
232
+ except:
233
+ if "log_content" not in training_status:
234
+ training_status["log_content"] = ""
235
+ training_status["log_content"] += "\n" + "=" * 50 + "\nTraining completed successfully!\n"
236
+ else:
237
+ training_status.update({
238
+ "is_running": False,
239
+ "status": "failed"
240
+ })
241
+ logger.error("Training failed!")
242
+
243
+ # ์‹คํŒจ ๋กœ๊ทธ ๊ธฐ๋ก
244
+ try:
245
+ with open(log_file, "a", encoding="utf-8") as f:
246
+ f.write("\n" + "=" * 50 + "\n")
247
+ f.write(f"Training failed with return code: {process.returncode}\n")
248
+ except:
249
+ if "log_content" not in training_status:
250
+ training_status["log_content"] = ""
251
+ training_status["log_content"] += "\n" + "=" * 50 + f"\nTraining failed with return code: {process.returncode}\n"
252
+
253
+ except Exception as e:
254
+ logger.error(f"Training error: {str(e)}")
255
+ training_status.update({
256
+ "is_running": False,
257
+ "status": "error",
258
+ "error": str(e)
259
+ })
260
+
261
+ # ์˜ค๋ฅ˜ ๋กœ๊ทธ ๊ธฐ๋ก
262
+ try:
263
+ with open(log_file, "a", encoding="utf-8") as f:
264
+ f.write(f"\nError: {str(e)}\n")
265
+ except:
266
+ if "log_content" not in training_status:
267
+ training_status["log_content"] = ""
268
+ training_status["log_content"] += f"\nError: {str(e)}\n"
269
+
270
+ @app.get("/status")
271
+ async def get_status():
272
+ """ํ•™์Šต ์ƒํƒœ ์กฐํšŒ"""
273
+ return training_status
274
+
275
+ @app.get("/logs")
276
+ async def get_logs():
277
+ """๋กœ๊ทธ ์กฐํšŒ"""
278
+ log_file = Path(training_status["log_file"])
279
+ if log_file.exists():
280
+ try:
281
+ with open(log_file, "r", encoding="utf-8") as f:
282
+ logs = f.read()
283
+ return {"logs": logs}
284
+ except:
285
+ pass
286
+
287
+ # ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
288
+ if "log_content" in training_status:
289
+ return {"logs": training_status["log_content"]}
290
+ else:
291
+ return {"logs": "No logs available"}
292
+
293
+ @app.get("/logs/stream")
294
+ async def stream_logs():
295
+ """์‹ค์‹œ๊ฐ„ ๋กœ๊ทธ ์ŠคํŠธ๋ฆฌ๋ฐ"""
296
+ def generate_logs():
297
+ log_file = Path(training_status["log_file"])
298
+ if log_file.exists():
299
+ try:
300
+ with open(log_file, "r", encoding="utf-8") as f:
301
+ for line in f:
302
+ yield f"data: {line}\\n\\n"
303
+ except:
304
+ pass
305
+
306
+ # ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์œผ๋ฉด ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
307
+ if "log_content" in training_status:
308
+ for line in training_status["log_content"].split('\n'):
309
+ yield f"data: {line}\\n\\n"
310
+ else:
311
+ yield "data: No logs available\\n\\n"
312
+
313
+ return StreamingResponse(generate_logs(), media_type="text/plain")
314
+
315
+ @app.post("/stop_training")
316
+ async def stop_training():
317
+ """ํ•™์Šต ์ค‘์ง€"""
318
+ global training_status
319
+
320
+ if not training_status["is_running"]:
321
+ raise HTTPException(status_code=400, detail="No training is running")
322
+
323
+ training_status.update({
324
+ "is_running": False,
325
+ "status": "stopped"
326
+ })
327
+
328
+ return {"message": "Training stopped"}
329
+
330
+ @app.get("/health")
331
+ async def health_check():
332
+ """ํ—ฌ์Šค ์ฒดํฌ"""
333
+ return {"status": "healthy", "timestamp": "2024-01-01T00:00:00Z"}
334
+
335
+ @app.get("/data_info")
336
+ async def get_data_info():
337
+ """๋ฐ์ดํ„ฐ ์ •๋ณด ์กฐํšŒ"""
338
+ train_file = Path("/app/train.csv")
339
+ val_file = Path("/app/validation.csv")
340
+ config_file = Path("/app/autotrain_ultra_low_final.yaml")
341
+
342
+ info = {
343
+ "train_file_exists": train_file.exists(),
344
+ "validation_file_exists": val_file.exists(),
345
+ "config_file_exists": config_file.exists(),
346
+ "train_file_size": train_file.stat().st_size if train_file.exists() else 0,
347
+ "validation_file_size": val_file.stat().st_size if val_file.exists() else 0,
348
+ "config_file_size": config_file.stat().st_size if config_file.exists() else 0
349
+ }
350
+
351
+ return info
352
+
353
+ if __name__ == "__main__":
354
+ uvicorn.run(app, host="0.0.0.0", port=7860)