seawolf2357 commited on
Commit
446771b
·
verified ·
1 Parent(s): e4fdbe6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -58
app.py CHANGED
@@ -882,9 +882,31 @@ def convert_hwp_to_markdown(input_path: str) -> tuple:
882
  return text, None
883
  return None, error
884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  # ============== LLM API (Groq 라이브러리 사용) ==============
886
  def call_groq_api_stream(messages: List[Dict]) -> Generator[str, None, None]:
887
- """Groq API 스트리밍 호출 - openai/gpt-oss-120b 모델 사용"""
888
  if not GROQ_AVAILABLE:
889
  yield "❌ Groq 라이브러리가 설치되지 않았습니다. pip install groq"
890
  return
@@ -1107,12 +1129,9 @@ def chat_response(message: str, history: List[Dict], file: Optional[str],
1107
 
1108
  # 디버그 로그
1109
  print(f"\n🤖 [API 요청]")
1110
- print(f" - 모델: openai/gpt-oss-120b")
1111
  print(f" - 메시지 수: {len(api_messages)}")
1112
  print(f" - 파일 타입: {file_type}")
1113
  print(f" - 문서 길이: {len(file_content) if file_content else 0} 글자")
1114
- if file_content:
1115
- print(f" - 문서 미리보기: {file_content[:200]}...")
1116
 
1117
  # 응답 생성
1118
  full_response = ""
@@ -1143,34 +1162,26 @@ def load_session(session_id: str) -> tuple:
1143
  messages = get_session_messages(session_id, limit=50)
1144
  return [{"role": m["role"], "content": m["content"]} for m in messages], session_id
1145
 
1146
- # ============== HWP 변환기 ==============
1147
- def convert_to_odt_subprocess(input_path, output_dir):
1148
- output_path = os.path.join(output_dir, "output.odt")
1149
- try:
1150
- result = subprocess.run(['hwp5odt', '--output', output_path, input_path], capture_output=True, timeout=120)
1151
- if result.returncode == 0 and os.path.exists(output_path):
1152
- return output_path, None
1153
- except:
1154
- pass
1155
- return None, "ODT 변환 실패"
1156
-
1157
- def convert_to_xml_subprocess(input_path, output_dir):
1158
- output_path = os.path.join(output_dir, "output.xml")
1159
- try:
1160
- result = subprocess.run(['hwp5xml', input_path], capture_output=True, timeout=120)
1161
- if result.returncode == 0 and result.stdout:
1162
- with open(output_path, 'wb') as f:
1163
- f.write(result.stdout)
1164
- return output_path, None
1165
- except:
1166
- pass
1167
- return None, "XML 변환 실패"
1168
-
1169
  def convert_hwp(file, output_format, progress=gr.Progress()):
1170
- if not file:
 
1171
  return None, "❌ 파일을 업로드해주세요.", ""
1172
 
1173
- input_file = file.name if hasattr(file, 'name') else str(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
1174
  ext_lower = Path(input_file).suffix.lower()
1175
 
1176
  if ext_lower not in ['.hwp', '.hwpx']:
@@ -1178,19 +1189,25 @@ def convert_hwp(file, output_format, progress=gr.Progress()):
1178
 
1179
  progress(0.1, desc="📖 파일 읽는 중...")
1180
  version, is_valid = check_hwp_version(input_file)
 
 
1181
  if not is_valid:
1182
  return None, f"❌ 지원하지 않는 파일: {version}", ""
1183
 
1184
- tmp_dir = tempfile.mkdtemp()
 
 
1185
 
1186
  try:
1187
  input_filename = os.path.basename(input_file)
1188
  input_path = os.path.join(tmp_dir, input_filename)
1189
- shutil.copy(input_file, input_path)
1190
 
1191
  progress(0.3, desc=f"🔄 {output_format}로 변환 중...")
1192
 
1193
- output_path, error, ext = None, None, ""
 
 
1194
 
1195
  if output_format == "HTML":
1196
  if ext_lower == '.hwpx':
@@ -1209,19 +1226,21 @@ def convert_hwp(file, output_format, progress=gr.Progress()):
1209
 
1210
  elif output_format == "TXT (텍스트)":
1211
  text, error = extract_text_from_hwp_or_hwpx(input_path)
1212
- if text:
1213
  output_path = os.path.join(tmp_dir, "output.txt")
1214
  with open(output_path, 'w', encoding='utf-8') as f:
1215
  f.write(text)
1216
- ext = ".txt"
 
1217
 
1218
  elif output_format == "⭐ MARKDOWN (추천)":
1219
  text, error = convert_hwp_to_markdown(input_path)
1220
- if text:
1221
  output_path = os.path.join(tmp_dir, "output.md")
1222
  with open(output_path, 'w', encoding='utf-8') as f:
1223
  f.write(text)
1224
- ext = ".md"
 
1225
 
1226
  elif output_format == "XML":
1227
  if ext_lower == '.hwpx':
@@ -1232,31 +1251,48 @@ def convert_hwp(file, output_format, progress=gr.Progress()):
1232
  if name.endswith('.xml'):
1233
  with zf.open(name) as f:
1234
  xml_contents.append(f"<!-- {name} -->\n{f.read().decode('utf-8', errors='ignore')}")
1235
- output_path = os.path.join(tmp_dir, "output.xml")
1236
- with open(output_path, 'w', encoding='utf-8') as f:
1237
- f.write('\n\n'.join(xml_contents))
 
 
1238
  except Exception as e:
1239
  error = f"HWPX XML 추출 실패: {e}"
1240
  else:
1241
  output_path, error = convert_to_xml_subprocess(input_path, tmp_dir)
1242
- ext = ".xml"
1243
 
 
1244
  if not output_path:
 
1245
  return None, f"❌ {error or '변환 실패'}", ""
1246
 
1247
  if not os.path.exists(output_path):
 
1248
  return None, "❌ 변환된 파일을 찾을 수 없습니다.", ""
1249
 
1250
  progress(0.8, desc="✅ 완료 중...")
1251
 
 
1252
  base_name = Path(input_filename).stem
1253
- final_output = os.path.join(tmp_dir, f"{base_name}{ext}")
 
 
 
1254
  if output_path != final_output:
1255
  shutil.copy2(output_path, final_output)
1256
 
 
 
 
 
1257
  file_size = os.path.getsize(final_output)
 
 
 
1258
  size_str = f"{file_size/1024:.1f} KB" if file_size > 1024 else f"{file_size} bytes"
1259
 
 
1260
  preview = ""
1261
  if ext in ['.txt', '.md', '.xml']:
1262
  try:
@@ -1264,24 +1300,35 @@ def convert_hwp(file, output_format, progress=gr.Progress()):
1264
  preview = f.read(5000)
1265
  if len(preview) >= 5000:
1266
  preview += "\n\n... (생략)"
1267
- except:
1268
- pass
1269
  elif ext == '.zip':
1270
  preview = "📦 HTML이 ZIP으로 압축되었습니다."
 
 
 
 
1271
 
1272
  progress(1.0, desc="🎉 완료!")
1273
- return final_output, f"✅ 변환 완료: {base_name}{ext} ({size_str})", preview
 
 
 
 
 
 
1274
 
1275
  except Exception as e:
1276
  import traceback
1277
  traceback.print_exc()
1278
  return None, f"❌ 오류: {str(e)}", ""
1279
 
 
1280
  # ============== Gradio UI ==============
1281
  with gr.Blocks(
1282
  title="HWPower AI 어시스턴트",
1283
  css=COMIC_CSS,
1284
- delete_cache=(60, 60),
1285
  head="""
1286
  <style>
1287
  header, .container.svelte-1kyws56, #huggingface-space-header,
@@ -1346,8 +1393,6 @@ setInterval(hideHfHeader, 300);
1346
  with gr.Tabs():
1347
  # Tab 1: AI 채팅
1348
  with gr.Tab("💬 AI 채팅"):
1349
- # Feature Box
1350
-
1351
  with gr.Row():
1352
  with gr.Column(scale=1):
1353
  gr.HTML("""
@@ -1366,7 +1411,7 @@ setInterval(hideHfHeader, 300);
1366
  refresh_btn = gr.Button("🔄 새로고침", size="sm")
1367
 
1368
  with gr.Column(scale=3):
1369
- chatbot = gr.Chatbot(label="💬 AI 대화", height=500)
1370
 
1371
  with gr.Row():
1372
  file_upload = gr.File(
@@ -1483,19 +1528,31 @@ setInterval(hideHfHeader, 300);
1483
  for r in chat_response(msg, hist, f, sid):
1484
  yield r[0], r[1], "", None
1485
 
1486
- submit_btn.click(on_submit, [msg_input, chatbot, file_upload, session_state],
1487
- [chatbot, session_state, msg_input, file_upload])
1488
- msg_input.submit(on_submit, [msg_input, chatbot, file_upload, session_state],
1489
- [chatbot, session_state, msg_input, file_upload])
 
 
 
 
 
 
1490
 
1491
- new_btn.click(lambda: ([], create_session(), None, ""), outputs=[chatbot, session_state, file_upload, msg_input])
1492
- clear_btn.click(lambda: ([], None, ""), outputs=[chatbot, file_upload, msg_input])
 
 
 
 
 
 
1493
 
1494
  def refresh():
1495
  sessions = get_all_sessions()
1496
  return [[s["session_id"][:8], s["title"] or "제목없음", s["updated_at"][:16] if s["updated_at"] else ""] for s in sessions]
1497
 
1498
- refresh_btn.click(refresh, outputs=[session_list])
1499
 
1500
  def select_session(evt: gr.SelectData, data):
1501
  if evt.index[0] < len(data):
@@ -1504,9 +1561,16 @@ setInterval(hideHfHeader, 300);
1504
  return load_session(s["session_id"])
1505
  return [], ""
1506
 
1507
- session_list.select(select_session, [session_list], [chatbot, session_state])
1508
- convert_btn.click(convert_hwp, [hwp_input, format_select], [file_out, status_out, preview_out])
1509
- demo.load(refresh, outputs=[session_list])
 
 
 
 
 
 
 
1510
 
1511
  if __name__ == "__main__":
1512
  demo.launch(ssr_mode=False)
 
882
  return text, None
883
  return None, error
884
 
885
+ def convert_to_odt_subprocess(input_path, output_dir):
886
+ output_path = os.path.join(output_dir, "output.odt")
887
+ try:
888
+ result = subprocess.run(['hwp5odt', '--output', output_path, input_path], capture_output=True, timeout=120)
889
+ if result.returncode == 0 and os.path.exists(output_path):
890
+ return output_path, None
891
+ except:
892
+ pass
893
+ return None, "ODT 변환 실패"
894
+
895
+ def convert_to_xml_subprocess(input_path, output_dir):
896
+ output_path = os.path.join(output_dir, "output.xml")
897
+ try:
898
+ result = subprocess.run(['hwp5xml', input_path], capture_output=True, timeout=120)
899
+ if result.returncode == 0 and result.stdout:
900
+ with open(output_path, 'wb') as f:
901
+ f.write(result.stdout)
902
+ return output_path, None
903
+ except:
904
+ pass
905
+ return None, "XML 변환 실패"
906
+
907
  # ============== LLM API (Groq 라이브러리 사용) ==============
908
  def call_groq_api_stream(messages: List[Dict]) -> Generator[str, None, None]:
909
+ """Groq API 스트리밍 호출"""
910
  if not GROQ_AVAILABLE:
911
  yield "❌ Groq 라이브러리가 설치되지 않았습니다. pip install groq"
912
  return
 
1129
 
1130
  # 디버그 로그
1131
  print(f"\n🤖 [API 요청]")
 
1132
  print(f" - 메시지 수: {len(api_messages)}")
1133
  print(f" - 파일 타입: {file_type}")
1134
  print(f" - 문서 길이: {len(file_content) if file_content else 0} 글자")
 
 
1135
 
1136
  # 응답 생성
1137
  full_response = ""
 
1162
  messages = get_session_messages(session_id, limit=50)
1163
  return [{"role": m["role"], "content": m["content"]} for m in messages], session_id
1164
 
1165
+ # ============== HWP 변환기 (수정됨) ==============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1166
  def convert_hwp(file, output_format, progress=gr.Progress()):
1167
+ """HWP/HWPX 파일을 다양한 형식으로 변환 (다운로드 문제 수정)"""
1168
+ if file is None:
1169
  return None, "❌ 파일을 업로드해주세요.", ""
1170
 
1171
+ # Gradio 버전에 따른 파일 경로 처리
1172
+ if isinstance(file, str):
1173
+ input_file = file
1174
+ elif hasattr(file, 'name'):
1175
+ input_file = file.name
1176
+ else:
1177
+ input_file = str(file)
1178
+
1179
+ print(f"\n🔄 [변환 시작] 입력 파일: {input_file}")
1180
+
1181
+ # 파일 존재 확인
1182
+ if not os.path.exists(input_file):
1183
+ return None, f"❌ 파일을 찾을 수 없습니다: {input_file}", ""
1184
+
1185
  ext_lower = Path(input_file).suffix.lower()
1186
 
1187
  if ext_lower not in ['.hwp', '.hwpx']:
 
1189
 
1190
  progress(0.1, desc="📖 파일 읽는 중...")
1191
  version, is_valid = check_hwp_version(input_file)
1192
+ print(f" 파일 버전: {version}, 유효: {is_valid}")
1193
+
1194
  if not is_valid:
1195
  return None, f"❌ 지원하지 않는 파일: {version}", ""
1196
 
1197
+ # 임시 디렉토리 생성
1198
+ tmp_dir = tempfile.mkdtemp(prefix="hwp_convert_")
1199
+ print(f" 임시 디렉토리: {tmp_dir}")
1200
 
1201
  try:
1202
  input_filename = os.path.basename(input_file)
1203
  input_path = os.path.join(tmp_dir, input_filename)
1204
+ shutil.copy2(input_file, input_path)
1205
 
1206
  progress(0.3, desc=f"🔄 {output_format}로 변환 중...")
1207
 
1208
+ output_path = None
1209
+ error = None
1210
+ ext = ""
1211
 
1212
  if output_format == "HTML":
1213
  if ext_lower == '.hwpx':
 
1226
 
1227
  elif output_format == "TXT (텍스트)":
1228
  text, error = extract_text_from_hwp_or_hwpx(input_path)
1229
+ if text and text.strip():
1230
  output_path = os.path.join(tmp_dir, "output.txt")
1231
  with open(output_path, 'w', encoding='utf-8') as f:
1232
  f.write(text)
1233
+ ext = ".txt"
1234
+ print(f" TXT 생성 완료: {len(text)} 글자")
1235
 
1236
  elif output_format == "⭐ MARKDOWN (추천)":
1237
  text, error = convert_hwp_to_markdown(input_path)
1238
+ if text and text.strip():
1239
  output_path = os.path.join(tmp_dir, "output.md")
1240
  with open(output_path, 'w', encoding='utf-8') as f:
1241
  f.write(text)
1242
+ ext = ".md"
1243
+ print(f" MD 생성 완료: {len(text)} 글자")
1244
 
1245
  elif output_format == "XML":
1246
  if ext_lower == '.hwpx':
 
1251
  if name.endswith('.xml'):
1252
  with zf.open(name) as f:
1253
  xml_contents.append(f"<!-- {name} -->\n{f.read().decode('utf-8', errors='ignore')}")
1254
+ if xml_contents:
1255
+ output_path = os.path.join(tmp_dir, "output.xml")
1256
+ with open(output_path, 'w', encoding='utf-8') as f:
1257
+ f.write('\n\n'.join(xml_contents))
1258
+ ext = ".xml"
1259
  except Exception as e:
1260
  error = f"HWPX XML 추출 실패: {e}"
1261
  else:
1262
  output_path, error = convert_to_xml_subprocess(input_path, tmp_dir)
1263
+ ext = ".xml"
1264
 
1265
+ # 변환 결과 확인
1266
  if not output_path:
1267
+ print(f" ❌ 변환 실패: {error}")
1268
  return None, f"❌ {error or '변환 실패'}", ""
1269
 
1270
  if not os.path.exists(output_path):
1271
+ print(f" ❌ 출력 파일 없음: {output_path}")
1272
  return None, "❌ 변환된 파일을 찾을 수 없습니다.", ""
1273
 
1274
  progress(0.8, desc="✅ 완료 중...")
1275
 
1276
+ # 최종 파일명 생성
1277
  base_name = Path(input_filename).stem
1278
+ final_filename = f"{base_name}{ext}"
1279
+ final_output = os.path.join(tmp_dir, final_filename)
1280
+
1281
+ # 파일명이 다르면 복사
1282
  if output_path != final_output:
1283
  shutil.copy2(output_path, final_output)
1284
 
1285
+ # 파일 검증
1286
+ if not os.path.exists(final_output):
1287
+ return None, "❌ 최종 파일 생성 실패", ""
1288
+
1289
  file_size = os.path.getsize(final_output)
1290
+ if file_size == 0:
1291
+ return None, "❌ 변환된 파일이 비어있습니다.", ""
1292
+
1293
  size_str = f"{file_size/1024:.1f} KB" if file_size > 1024 else f"{file_size} bytes"
1294
 
1295
+ # 미리보기 생성
1296
  preview = ""
1297
  if ext in ['.txt', '.md', '.xml']:
1298
  try:
 
1300
  preview = f.read(5000)
1301
  if len(preview) >= 5000:
1302
  preview += "\n\n... (생략)"
1303
+ except Exception as e:
1304
+ preview = f"미리보기 로드 실패: {e}"
1305
  elif ext == '.zip':
1306
  preview = "📦 HTML이 ZIP으로 압축되었습니다."
1307
+ elif ext == '.html':
1308
+ preview = "🌐 HTML 파일이 생성되었습니다."
1309
+ elif ext == '.odt':
1310
+ preview = "📄 ODT 파일이 생성되었습니다."
1311
 
1312
  progress(1.0, desc="🎉 완료!")
1313
+
1314
+ print(f" ✅ 변환 완료: {final_output}")
1315
+ print(f" 크기: {size_str}")
1316
+ print(f" 존재: {os.path.exists(final_output)}")
1317
+
1318
+ # 파일 경로 반환
1319
+ return final_output, f"✅ 변환 완료: {final_filename} ({size_str})", preview
1320
 
1321
  except Exception as e:
1322
  import traceback
1323
  traceback.print_exc()
1324
  return None, f"❌ 오류: {str(e)}", ""
1325
 
1326
+
1327
  # ============== Gradio UI ==============
1328
  with gr.Blocks(
1329
  title="HWPower AI 어시스턴트",
1330
  css=COMIC_CSS,
1331
+ delete_cache=(3600, 3600), # 1시간 캐시 유지
1332
  head="""
1333
  <style>
1334
  header, .container.svelte-1kyws56, #huggingface-space-header,
 
1393
  with gr.Tabs():
1394
  # Tab 1: AI 채팅
1395
  with gr.Tab("💬 AI 채팅"):
 
 
1396
  with gr.Row():
1397
  with gr.Column(scale=1):
1398
  gr.HTML("""
 
1411
  refresh_btn = gr.Button("🔄 새로고침", size="sm")
1412
 
1413
  with gr.Column(scale=3):
1414
+ chatbot = gr.Chatbot(label="💬 AI 대화", height=500, type="messages")
1415
 
1416
  with gr.Row():
1417
  file_upload = gr.File(
 
1528
  for r in chat_response(msg, hist, f, sid):
1529
  yield r[0], r[1], "", None
1530
 
1531
+ submit_btn.click(
1532
+ fn=on_submit,
1533
+ inputs=[msg_input, chatbot, file_upload, session_state],
1534
+ outputs=[chatbot, session_state, msg_input, file_upload]
1535
+ )
1536
+ msg_input.submit(
1537
+ fn=on_submit,
1538
+ inputs=[msg_input, chatbot, file_upload, session_state],
1539
+ outputs=[chatbot, session_state, msg_input, file_upload]
1540
+ )
1541
 
1542
+ new_btn.click(
1543
+ fn=lambda: ([], create_session(), None, ""),
1544
+ outputs=[chatbot, session_state, file_upload, msg_input]
1545
+ )
1546
+ clear_btn.click(
1547
+ fn=lambda: ([], None, ""),
1548
+ outputs=[chatbot, file_upload, msg_input]
1549
+ )
1550
 
1551
  def refresh():
1552
  sessions = get_all_sessions()
1553
  return [[s["session_id"][:8], s["title"] or "제목없음", s["updated_at"][:16] if s["updated_at"] else ""] for s in sessions]
1554
 
1555
+ refresh_btn.click(fn=refresh, outputs=[session_list])
1556
 
1557
  def select_session(evt: gr.SelectData, data):
1558
  if evt.index[0] < len(data):
 
1561
  return load_session(s["session_id"])
1562
  return [], ""
1563
 
1564
+ session_list.select(fn=select_session, inputs=[session_list], outputs=[chatbot, session_state])
1565
+
1566
+ # 변환 버튼 이벤트 (수정됨)
1567
+ convert_btn.click(
1568
+ fn=convert_hwp,
1569
+ inputs=[hwp_input, format_select],
1570
+ outputs=[file_out, status_out, preview_out]
1571
+ )
1572
+
1573
+ demo.load(fn=refresh, outputs=[session_list])
1574
 
1575
  if __name__ == "__main__":
1576
  demo.launch(ssr_mode=False)