seawolf2357 commited on
Commit
e950483
·
verified ·
1 Parent(s): 1765553

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +802 -0
app.py ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 기업마당 AI 분석기 - 메인 애플리케이션
3
+ """
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import json
7
+ import os
8
+ import tempfile
9
+ from pathlib import Path
10
+ from typing import Optional, Tuple, List, Dict, Generator
11
+ from datetime import datetime
12
+
13
+ from utils import (
14
+ CATEGORY_CODES, REGION_LIST, SIDO_LIST, ORG_TYPE_OPTIONS, SORT_OPTIONS, STATUS_OPTIONS,
15
+ COMPANY_TYPE_OPTIONS, CORP_TYPE_OPTIONS, COMPANY_SIZE_OPTIONS, INDUSTRY_MAJOR_OPTIONS,
16
+ CORE_INDUSTRY_OPTIONS, NATIONAL_STRATEGIC_TECH, CREDIT_GRADE_OPTIONS, TCB_GRADE_OPTIONS,
17
+ ISO_CERT_OPTIONS, extract_region_from_text, extract_region_from_hashtags,
18
+ classify_org_type, parse_deadline, is_ongoing, calculate_age, calculate_company_age
19
+ )
20
+
21
+ from file_api import (
22
+ fetch_all_from_api, download_file, extract_text_from_file, extract_zip_files,
23
+ call_groq_api_stream, fetch_announcement_detail
24
+ )
25
+
26
+
27
+ # ============================================================
28
+ # 공고 조회 함수
29
+ # ============================================================
30
+ def fetch_announcements(keyword="", category="전체", region="전체(지역)", org_type="전체",
31
+ sort_by="등록일순", status_filter="진행중", page=1, rows=20) -> Tuple[pd.DataFrame, str]:
32
+ """기업마당 API로 공고 목록 조회"""
33
+ try:
34
+ items, error = fetch_all_from_api(category, region, keyword)
35
+ if error and not items:
36
+ return pd.DataFrame(), error
37
+ if not items:
38
+ return pd.DataFrame(), "⚠️ 검색 결과가 없습니다."
39
+
40
+ data = []
41
+ for item in items:
42
+ if not isinstance(item, dict):
43
+ continue
44
+ author = item.get("author", "") or item.get("jrsdInsttNm", "")
45
+ title = item.get("title", "") or item.get("pblancNm", "")
46
+ exec_org = item.get("excInsttNm", "") or ""
47
+ hash_tags = item.get("hashTags", "")
48
+
49
+ item_region = extract_region_from_hashtags(hash_tags)
50
+ if not item_region:
51
+ item_region = extract_region_from_text(title)
52
+ if not item_region:
53
+ item_region = extract_region_from_text(author)
54
+
55
+ item_org_type = classify_org_type(author)
56
+ req_dt = item.get("reqstDt", "") or item.get("reqstBeginEndDe", "")
57
+ item_ongoing = is_ongoing(req_dt)
58
+ pub_date = item.get("pubDate", "") or item.get("creatPnttm", "") or ""
59
+ if pub_date and len(str(pub_date)) >= 10:
60
+ pub_date = str(pub_date)[:10]
61
+
62
+ link = item.get("link", "") or item.get("pblancUrl", "")
63
+ pblanc_id = item.get("seq", "") or item.get("pblancId", "")
64
+
65
+ attachments = []
66
+ file_url = item.get("flpthNm", "")
67
+ file_name = item.get("fileNm", "")
68
+ if file_url and file_name:
69
+ attachments.append({"url": file_url, "filename": file_name, "type": Path(file_name).suffix.lower()[1:] if Path(file_name).suffix else "unknown"})
70
+
71
+ # 추가 첨부파일 (print 파일)
72
+ print_url = item.get("printFlpthNm", "")
73
+ print_name = item.get("printFileNm", "")
74
+ if print_url and print_name:
75
+ if not any(att['url'] == print_url for att in attachments):
76
+ attachments.append({"url": print_url, "filename": print_name, "type": Path(print_name).suffix.lower()[1:] if Path(print_name).suffix else "unknown"})
77
+
78
+ description = item.get("description", "") or item.get("bsnsSumryCn", "")
79
+ if description:
80
+ import re
81
+ description = re.sub(r'<[^>]+>', '', description).strip()
82
+
83
+ deadline = parse_deadline(req_dt)
84
+ row = {
85
+ "지원분야": item.get("lcategory", "") or item.get("pldirSportRealmLclasCodeNm", ""),
86
+ "지원사업명": title, "신청기간": req_dt, "소관부처": author,
87
+ "수행기관": exec_org, "등록일": pub_date, "조회수": item.get("inqireCo", "") or "",
88
+ "상세링크": link, "공고ID": pblanc_id,
89
+ "사업개요": description[:200] + "..." if len(description) > 200 else description,
90
+ "첨부파일": attachments, "지원대상": item.get("trgetNm", ""),
91
+ "문의처": item.get("refrncNm", ""), "신청URL": item.get("rceptEngnHmpgUrl", ""),
92
+ "_org_type": item_org_type, "_ongoing": item_ongoing, "_deadline": deadline,
93
+ "_pub_date": pub_date, "_region": item_region,
94
+ }
95
+ data.append(row)
96
+
97
+ if not data:
98
+ return pd.DataFrame(), "⚠️ 검색 결과가 없습니다."
99
+
100
+ df = pd.DataFrame(data)
101
+ total_before_filter = len(df)
102
+
103
+ if org_type == "중앙부처":
104
+ df = df[df["_org_type"] == "중앙부처"]
105
+ elif org_type == "지자체":
106
+ df = df[df["_org_type"] == "지자체"]
107
+ if region and region != "전체(지역)":
108
+ df = df[df["_region"] == region]
109
+ if status_filter == "진행중":
110
+ df = df[df["_ongoing"] == True]
111
+
112
+ if sort_by == "등록일순":
113
+ df = df.sort_values(by="_pub_date", ascending=False)
114
+ elif sort_by == "마감일순":
115
+ df = df.sort_values(by="_deadline", ascending=True, na_position='last')
116
+
117
+ if len(df) == 0:
118
+ return pd.DataFrame(), f"⚠️ 필터 조건에 맞는 결과가 없습니다. (전체 {total_before_filter}건 중)"
119
+
120
+ total_filtered = len(df)
121
+ start_idx = (page - 1) * rows
122
+ end_idx = start_idx + rows
123
+ df_page = df.iloc[start_idx:end_idx].copy()
124
+ df_page.insert(0, "번호", range(total_filtered - start_idx, total_filtered - start_idx - len(df_page), -1))
125
+ internal_cols = [c for c in df_page.columns if c.startswith("_")]
126
+ df_page = df_page.drop(columns=internal_cols)
127
+
128
+ status = f"✅ {len(df_page)}건 표시 (페이지 {page}) | 필터 결과: {total_filtered}건 | API 수집: {total_before_filter}건"
129
+ return df_page, status
130
+ except Exception as e:
131
+ import traceback
132
+ return pd.DataFrame(), f"❌ 오류: {str(e)[:80]}"
133
+
134
+
135
+ # ============================================================
136
+ # AI 분석 함수
137
+ # ============================================================
138
+ def analyze_announcement(detail_url, project_name, api_attachments=None, api_description="", progress=gr.Progress()):
139
+ """공고 첨부파일을 다운로드하고 AI로 분석"""
140
+ if not detail_url:
141
+ yield "❌ 분석할 공고를 선택해주세요."
142
+ return
143
+
144
+ output = f"# 📄 {project_name}\n\n---\n\n"
145
+ attachments = api_attachments if api_attachments else []
146
+ all_text = f"## 공고명: {project_name}\n\n"
147
+
148
+ if api_description:
149
+ all_text += f"### 사업개요:\n{api_description}\n\n"
150
+ output += f"📋 **사업개요**\n{api_description}\n\n"
151
+ yield output
152
+
153
+ output += f"📎 **첨부파일 {len(attachments)}개 발견**\n\n"
154
+ yield output
155
+
156
+ extracted_texts = []
157
+ if attachments:
158
+ with tempfile.TemporaryDirectory() as tmp_dir:
159
+ for i, att in enumerate(attachments):
160
+ progress(0.2 + (0.4 * i / len(attachments)), desc=f"파일 다운로드 중... ({i+1}/{len(attachments)})")
161
+ output += f"📥 다운로드 중: `{att['filename']}`\n"
162
+ yield output
163
+
164
+ file_path, error = download_file(att['url'], tmp_dir, att['filename'])
165
+ if error:
166
+ output += f" - ⚠️ {error}\n"
167
+ yield output
168
+ continue
169
+
170
+ if file_path and file_path.lower().endswith('.zip'):
171
+ extracted = extract_zip_files(file_path, tmp_dir)
172
+ for ext_file in extracted:
173
+ text, err = extract_text_from_file(ext_file)
174
+ if text:
175
+ extracted_texts.append({"filename": os.path.basename(ext_file), "text": text})
176
+ output += f" - ✅ 텍스트 추출 성공 ({len(text):,} 글자)\n"
177
+ yield output
178
+ elif file_path:
179
+ text, err = extract_text_from_file(file_path)
180
+ if text:
181
+ extracted_texts.append({"filename": os.path.basename(file_path), "text": text})
182
+ output += f" - ✅ 텍스트 추출 성공 ({len(text):,} 글자)\n"
183
+ yield output
184
+
185
+ if extracted_texts:
186
+ all_text += "### 첨부파일 내용:\n\n"
187
+ for ext in extracted_texts:
188
+ text_preview = ext['text'][:5000] if len(ext['text']) > 5000 else ext['text']
189
+ all_text += f"#### 📄 {ext['filename']}\n{text_preview}\n\n"
190
+
191
+ if len(all_text) < 100 and not extracted_texts:
192
+ output += "\n❌ **분석할 내용이 충분하지 않습니다.**\n"
193
+ yield output
194
+ return
195
+
196
+ output += f"\n📊 **분석 준비 완료** (총 {len(all_text):,}자)\n\n---\n\n## 🤖 AI 분석 결과\n\n"
197
+ yield output
198
+
199
+ progress(0.7, desc="AI 분석 중...")
200
+ system_prompt = """당신은 정부 지원사업 공고 분석 전문가입니다.
201
+ 주어진 공고 내용을 분석하여 다음 항목을 명확하게 정리해주세요:
202
+ - 사업명, 주관기관, 지원 목적
203
+ - 신청 자격 요건, 제외 대상
204
+ - 지원 금액/규모, 지원 항목/내용
205
+ - 신청 기간, 신청 방법, 제출 서류
206
+ - 중요 유의사항, 제한 사항
207
+ - 이 사업의 핵심 포인트를 3줄로 요약"""
208
+
209
+ messages = [
210
+ {"role": "system", "content": system_prompt},
211
+ {"role": "user", "content": f"다음 지원사업 공고를 분��해주세요:\n\n{all_text[:15000]}"}
212
+ ]
213
+
214
+ for chunk in call_groq_api_stream(messages):
215
+ output += chunk
216
+ yield output
217
+
218
+ output += "\n\n---\n✅ **분석 완료**"
219
+ yield output
220
+
221
+
222
+ # ============================================================
223
+ # 맞춤 과제 매칭 함수
224
+ # ============================================================
225
+ def analyze_uploaded_documents(files, progress=gr.Progress()):
226
+ """업로드된 문서들을 분석하여 기업 정보 추출"""
227
+ if not files:
228
+ yield "❌ 분석할 파일을 업로드해주세요."
229
+ return
230
+
231
+ output = "# 📄 업로드 문서 분석 결과\n\n"
232
+ all_extracted_text = []
233
+
234
+ for i, file in enumerate(files):
235
+ progress((i + 1) / len(files), desc=f"파일 분석 중... ({i+1}/{len(files)})")
236
+ filename = os.path.basename(file.name) if hasattr(file, 'name') else f"파일_{i+1}"
237
+ output += f"## 📎 {filename}\n\n"
238
+ yield output
239
+
240
+ try:
241
+ text, error = extract_text_from_file(file.name if hasattr(file, 'name') else file)
242
+ if text:
243
+ all_extracted_text.append({"filename": filename, "text": text})
244
+ preview = text[:500] + "..." if len(text) > 500 else text
245
+ output += f"✅ 텍스트 추출 성공 ({len(text):,}자)\n\n```\n{preview}\n```\n\n"
246
+ else:
247
+ output += f"⚠️ 텍스트 추출 실패: {error}\n\n"
248
+ except Exception as e:
249
+ output += f"❌ 오류: {str(e)}\n\n"
250
+ yield output
251
+
252
+ if all_extracted_text:
253
+ output += "---\n\n## 🤖 AI 기업정보 추출\n\n"
254
+ yield output
255
+
256
+ combined_text = "\n\n".join([f"[{item['filename']}]\n{item['text'][:3000]}" for item in all_extracted_text])
257
+
258
+ system_prompt = """당신은 기업 서류 분석 전문가입니다.
259
+ 주어진 문서들에서 다음 정보를 추출해주세요:
260
+ 1. 사업자 정보 (사업자등록번호, 법인등록번호, 상호, 대표자, 설립일, 주소, 업종)
261
+ 2. 재무 정보 (자본금, 매출액, 영업이익, 당기순이익)
262
+ 3. 인력 정보 (상시근로자 수, 4대보험 가입자 수)
263
+ 4. 인증/등록 정보 (부설연구소, 벤처기업 인증 등)
264
+ 5. 기타 특이사항
265
+ JSON 형식으로 정리해주세요."""
266
+
267
+ messages = [
268
+ {"role": "system", "content": system_prompt},
269
+ {"role": "user", "content": f"다음 기업 서류들을 분석해주세요:\n\n{combined_text[:12000]}"}
270
+ ]
271
+
272
+ for chunk in call_groq_api_stream(messages):
273
+ output += chunk
274
+ yield output
275
+
276
+ output += "\n\n---\n✅ **문서 분석 완료**"
277
+ yield output
278
+
279
+
280
+ def match_announcements_with_profile(profile_data, announcements_df, progress=gr.Progress()):
281
+ """기업 프로필과 공고를 매칭"""
282
+ if not profile_data:
283
+ yield "❌ 기업 프로필을 먼저 입력해주세요."
284
+ return
285
+
286
+ if announcements_df is None or (isinstance(announcements_df, pd.DataFrame) and announcements_df.empty):
287
+ yield "❌ 매칭할 공고 데이터가 없습니다. 먼저 공고를 검색해주세요."
288
+ return
289
+
290
+ output = "# 🎯 맞춤 과제 매칭 결과\n\n"
291
+ output += "## 📋 입력된 기업 프로필\n\n"
292
+ output += f"```json\n{json.dumps(profile_data, ensure_ascii=False, indent=2)[:2000]}\n```\n\n"
293
+ output += "---\n\n## 🔍 AI 매칭 분석 중...\n\n"
294
+ yield output
295
+
296
+ announcements_text = ""
297
+ df_to_use = announcements_df if isinstance(announcements_df, pd.DataFrame) else pd.DataFrame()
298
+ for idx, row in df_to_use.head(20).iterrows():
299
+ announcements_text += f"""
300
+ ### {row.get('지원사업명', '')}
301
+ - 지원분야: {row.get('지원분야', '')}
302
+ - 소관부처: {row.get('소관부처', '')}
303
+ - 신청기간: {row.get('신청기간', '')}
304
+ - 지원대상: {row.get('지원대상', '')}
305
+ ---
306
+ """
307
+
308
+ system_prompt = """당신은 정부 지원사업 매칭 전문가입니다.
309
+ 기업 프로필과 공고 목록을 분석하여 신청 가능한 과제를 추천해주세요.
310
+ 각 공고에 대해:
311
+ - ✅ 적합: 신청 자격 충족
312
+ - ⚠️ 확인필요: 일부 조건 확인 필요
313
+ - ❌ 부적합: 자격 미달
314
+ 추천 순위와 이유를 설명해주세요."""
315
+
316
+ messages = [
317
+ {"role": "system", "content": system_prompt},
318
+ {"role": "user", "content": f"기업 프로필:\n{json.dumps(profile_data, ensure_ascii=False)}\n\n공고 목록:\n{announcements_text[:8000]}"}
319
+ ]
320
+
321
+ for chunk in call_groq_api_stream(messages):
322
+ output += chunk
323
+ yield output
324
+
325
+ output += "\n\n---\n✅ **매칭 분석 완료**"
326
+ yield output
327
+
328
+
329
+ # ============================================================
330
+ # CSS 스타일
331
+ # ============================================================
332
+ CUSTOM_CSS = """
333
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@400;500;700&display=swap');
334
+ .gradio-container {
335
+ font-family: 'Noto Sans KR', sans-serif !important;
336
+ background: linear-gradient(135deg, #f5f7fa 0%, #e8eef5 100%) !important;
337
+ max-width: 1600px !important;
338
+ margin: 0 auto !important;
339
+ }
340
+ .header-banner {
341
+ background: linear-gradient(135deg, #1a5cb0 0%, #0d4a94 100%);
342
+ color: white;
343
+ padding: 24px 28px;
344
+ border-radius: 16px;
345
+ margin-bottom: 20px;
346
+ box-shadow: 0 4px 20px rgba(26, 92, 176, 0.3);
347
+ }
348
+ .section-header {
349
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
350
+ color: white;
351
+ padding: 12px 16px;
352
+ border-radius: 8px;
353
+ margin: 16px 0 12px 0;
354
+ font-weight: 600;
355
+ }
356
+ .feature-badge {
357
+ display: inline-block;
358
+ background: rgba(255,255,255,0.2);
359
+ padding: 4px 12px;
360
+ border-radius: 20px;
361
+ font-size: 12px;
362
+ margin: 4px 4px 0 0;
363
+ }
364
+ .analyze-btn {
365
+ background: linear-gradient(135deg, #10B981 0%, #059669 100%) !important;
366
+ border: none !important;
367
+ color: white !important;
368
+ font-weight: 600 !important;
369
+ }
370
+ .analyze-btn:hover {
371
+ background: linear-gradient(135deg, #059669 0%, #047857 100%) !important;
372
+ }
373
+ .match-btn {
374
+ background: linear-gradient(135deg, #8B5CF6 0%, #7C3AED 100%) !important;
375
+ border: none !important;
376
+ color: white !important;
377
+ font-weight: 600 !important;
378
+ }
379
+ .match-btn:hover {
380
+ background: linear-gradient(135deg, #7C3AED 0%, #6D28D9 100%) !important;
381
+ }
382
+ .analysis-output {
383
+ background: white !important;
384
+ border: 2px solid #e5e7eb !important;
385
+ border-radius: 12px !important;
386
+ padding: 20px !important;
387
+ min-height: 500px !important;
388
+ max-height: 700px !important;
389
+ overflow-y: auto !important;
390
+ font-size: 14px !important;
391
+ line-height: 1.8 !important;
392
+ }
393
+ .filter-info {
394
+ background: #FEF3C7;
395
+ border: 1px solid #F59E0B;
396
+ border-radius: 8px;
397
+ padding: 12px;
398
+ margin-bottom: 16px;
399
+ font-size: 13px;
400
+ color: #92400E;
401
+ }
402
+ .footer-text {
403
+ text-align: center;
404
+ color: #868e96;
405
+ font-size: 13px;
406
+ margin-top: 16px;
407
+ }
408
+ .profile-section {
409
+ border: 1px solid #e5e7eb;
410
+ border-radius: 12px;
411
+ padding: 16px;
412
+ margin-bottom: 16px;
413
+ background: white;
414
+ }
415
+ """
416
+
417
+
418
+ # ============================================================
419
+ # 메인 인터페이스
420
+ # ============================================================
421
+ def create_interface():
422
+ with gr.Blocks(title="기업마당 AI 분석기", css=CUSTOM_CSS) as demo:
423
+ gr.HTML("""
424
+ <div class="header-banner">
425
+ <div style="display: flex; align-items: center; gap: 16px;">
426
+ <div style="width: 56px; height: 56px; background: white; border-radius: 14px; display: flex; align-items: center; justify-content: center; font-size: 28px;">🏢</div>
427
+ <div>
428
+ <h1 style="margin: 0; font-size: 24px;">기업마당 지원사업 AI 분석기</h1>
429
+ <p style="margin: 4px 0 0 0; opacity: 0.9;">공고 검색 · 첨부파일 자동 분석 · AI 요약 · 맞춤 과제 추출</p>
430
+ <div style="margin-top: 8px;">
431
+ <span class="feature-badge">📄 HWP/HWPX 지원</span>
432
+ <span class="feature-badge">🤖 AI 분석</span>
433
+ <span class="feature-badge">🎯 맞춤 매칭</span>
434
+ </div>
435
+ </div>
436
+ </div>
437
+ </div>
438
+ """)
439
+
440
+ # 상태 변수
441
+ selected_url = gr.State("")
442
+ selected_name = gr.State("")
443
+ selected_attachments = gr.State([])
444
+ selected_description = gr.State("")
445
+ current_df = gr.State(value=pd.DataFrame())
446
+ company_profile = gr.State(value={})
447
+
448
+ with gr.Tabs():
449
+ # 탭 1: 공고 검색
450
+ with gr.Tab("🔍 공고 검색"):
451
+ with gr.Row():
452
+ keyword_input = gr.Textbox(label="🔍 검색어", placeholder="예: AI, 스타트업, R&D", scale=3)
453
+ category_dropdown = gr.Dropdown(label="📂 지원분야", choices=list(CATEGORY_CODES.keys()), value="전체", scale=1)
454
+ region_dropdown = gr.Dropdown(label="📍 지역", choices=REGION_LIST, value="전체(지역)", scale=1)
455
+ with gr.Row():
456
+ org_type_dropdown = gr.Dropdown(label="🏛️ 기관유형", choices=ORG_TYPE_OPTIONS, value="전체", scale=1)
457
+ sort_dropdown = gr.Dropdown(label="📊 정렬", choices=SORT_OPTIONS, value="등록일순", scale=1)
458
+ status_dropdown = gr.Dropdown(label="📌 공고상태", choices=STATUS_OPTIONS, value="진행중", scale=1)
459
+ with gr.Row():
460
+ page_input = gr.Number(label="📄 페이지", value=1, minimum=1, scale=1)
461
+ rows_dropdown = gr.Dropdown(label="📊 표시개수", choices=[10, 15, 20, 30, 50], value=20, scale=1)
462
+ search_btn = gr.Button("🔎 검색", variant="primary", scale=2)
463
+
464
+ status_output = gr.Textbox(label="📊 조회 결과", interactive=False)
465
+ results_output = gr.Dataframe(label="📋 공고 목록 (행 클릭으로 선택)", wrap=True, interactive=False)
466
+ with gr.Row():
467
+ prev_btn = gr.Button("◀️ 이전", size="sm")
468
+ next_btn = gr.Button("다음 ▶️", size="sm")
469
+ export_btn = gr.Button("📥 CSV 내보내기", size="sm", variant="secondary")
470
+ csv_output = gr.File(label="📁 다운로드", visible=True)
471
+
472
+ # 탭 2: AI 분석
473
+ with gr.Tab("🤖 AI 분석"):
474
+ with gr.Row():
475
+ with gr.Column(scale=1):
476
+ selected_info = gr.Textbox(label="📌 선택된 공고", placeholder="공고 검색 탭에서 선택", lines=3, interactive=False)
477
+ analyze_btn = gr.Button("🚀 AI 분석 시작", variant="primary", size="lg", elem_classes=["analyze-btn"])
478
+ with gr.Column(scale=2):
479
+ analysis_output = gr.Markdown(value="### 📊 분석 결과\n\n*공고를 선택하고 분석 버튼을 클릭하세요*", height=500)
480
+
481
+ # 탭 3: 맞춤 과제 추출
482
+ with gr.Tab("🎯 맞춤 과제 추출"):
483
+ gr.HTML("""
484
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; margin-bottom: 20px;">
485
+ <h2 style="margin: 0 0 8px 0;">🎯 나만의 맞춤 과제 추출</h2>
486
+ <p style="margin: 0; opacity: 0.9;">기업 정보를 입력하고 문서를 업로드하면 AI가 신청 가능한 과제를 자동으로 매칭해드립니다.</p>
487
+ </div>
488
+ """)
489
+
490
+ with gr.Tabs():
491
+ # 서브탭 1: 기업 기본정보
492
+ with gr.Tab("1️⃣ 기업 기본정보"):
493
+ with gr.Row():
494
+ with gr.Column():
495
+ gr.HTML('<div class="section-header">📋 사업자 정보</div>')
496
+ biz_number = gr.Textbox(label="사업자등록번호", placeholder="000-00-00000")
497
+ corp_number = gr.Textbox(label="법인등록번호", placeholder="000000-0000000")
498
+ company_name = gr.Textbox(label="상호/법인명", placeholder="(주)회사명")
499
+ establish_date = gr.Textbox(label="설립일자", placeholder="YYYY-MM-DD")
500
+ company_type = gr.Dropdown(label="기업형태", choices=COMPANY_TYPE_OPTIONS, value="법인사업자")
501
+ corp_type = gr.Dropdown(label="법인 종류", choices=CORP_TYPE_OPTIONS, value="주식회사")
502
+ company_size = gr.Dropdown(label="기업규모", choices=COMPANY_SIZE_OPTIONS, value="소기업")
503
+
504
+ with gr.Column():
505
+ gr.HTML('<div class="section-header">🏆 인증 현황</div>')
506
+ venture_cert = gr.Checkbox(label="벤처기업 인증")
507
+ innobiz_cert = gr.Checkbox(label="이노비즈 인증")
508
+ mainbiz_cert = gr.Checkbox(label="메인비즈 인증")
509
+ sme_cert = gr.Checkbox(label="중소기업확인서 보유")
510
+ small_biz_cert = gr.Checkbox(label="소상공인확인서 보유")
511
+ social_venture = gr.Checkbox(label="소셜벤처")
512
+ startup_flag = gr.Checkbox(label="스타트업")
513
+
514
+ with gr.Column():
515
+ gr.HTML('<div class="section-header">📍 소재지 정보</div>')
516
+ hq_sido = gr.Dropdown(label="본사 소재지 (시/도)", choices=SIDO_LIST, value="서울특별시")
517
+ hq_sigungu = gr.Textbox(label="본사 소재지 (시/군/구)", placeholder="예: 강남구")
518
+ innovation_city = gr.Checkbox(label="혁신도시 입주")
519
+ industrial_complex = gr.Checkbox(label="산업단지 입주")
520
+ free_zone = gr.Checkbox(label="규제자유특구 소재")
521
+ non_capital = gr.Checkbox(label="비수도권 (지방기업)")
522
+
523
+ gr.HTML('<div class="section-header">📊 업종 정보</div>')
524
+ industry_major = gr.Dropdown(label="주업종 (대분류)", choices=INDUSTRY_MAJOR_OPTIONS, value="C. 제조업")
525
+ is_manufacturing = gr.Checkbox(label="제조업 여부")
526
+ is_knowledge_service = gr.Checkbox(label="지식서비스업 여부")
527
+
528
+ # 서브탭 2: 대표자/인력 정보
529
+ with gr.Tab("2️⃣ 대표자/인력 정보"):
530
+ with gr.Row():
531
+ with gr.Column():
532
+ gr.HTML('<div class="section-header">👤 대표자 정보</div>')
533
+ ceo_name = gr.Textbox(label="대표자명", placeholder="홍길동")
534
+ ceo_gender = gr.Radio(label="대표자 성별", choices=["남성", "여성"], value="남성")
535
+ ceo_birthdate = gr.Textbox(label="대표자 생년월일", placeholder="YYYY-MM-DD")
536
+ youth_ceo = gr.Checkbox(label="청년창업자 (만39세 미만)")
537
+ senior_ceo = gr.Checkbox(label="시니어창업자 (만40세 이상)")
538
+ women_company = gr.Checkbox(label="여성기업확인서 보유")
539
+ disabled_company = gr.Checkbox(label="장애인기업확인서 보유")
540
+
541
+ with gr.Column():
542
+ gr.HTML('<div class="section-header">👥 고용 현황</div>')
543
+ insurance_employees = gr.Number(label="4대보험 가입자 수", value=0, minimum=0)
544
+ regular_employees = gr.Number(label="상시근로자 수", value=0, minimum=0)
545
+ youth_employees = gr.Number(label="청년고용 인원", value=0, minimum=0)
546
+ female_ratio = gr.Slider(label="여성고용 비율 (%)", minimum=0, maximum=100, value=0)
547
+ new_hire_plan = gr.Number(label="신규채용 계획 (명)", value=0, minimum=0)
548
+
549
+ with gr.Column():
550
+ gr.HTML('<div class="section-header">🔬 연구인력/역량</div>')
551
+ rd_personnel = gr.Number(label="연구인력 수", value=0, minimum=0)
552
+ phd_researchers = gr.Number(label="박사급 연구원", value=0, minimum=0)
553
+ research_center = gr.Checkbox(label="기업부설연구소 등록")
554
+ rd_dept = gr.Checkbox(label="연구개발전담부서 등록")
555
+ patent_count = gr.Number(label="보유 특허 수", value=0, minimum=0)
556
+
557
+ # 서브탭 3: 재무 정보
558
+ with gr.Tab("3️⃣ 재무 정보"):
559
+ with gr.Row():
560
+ with gr.Column():
561
+ gr.HTML('<div class="section-header">💰 매출 및 수익</div>')
562
+ revenue_current = gr.Number(label="최근년도 매출액 (백만원)", value=0, minimum=0)
563
+ revenue_prev = gr.Number(label="전년도 매출액 (백만원)", value=0, minimum=0)
564
+ operating_profit = gr.Number(label="영업이익 (백만원)", value=0)
565
+ net_income = gr.Number(label="당기순이익 (백만원)", value=0)
566
+ export_amount = gr.Number(label="수출액 (천달러)", value=0, minimum=0)
567
+
568
+ with gr.Column():
569
+ gr.HTML('<div class="section-header">📊 재무건전성</div>')
570
+ capital = gr.Number(label="자본금 (백만원)", value=0, minimum=0)
571
+ total_assets = gr.Number(label="자산총계 (백만원)", value=0, minimum=0)
572
+ debt_ratio = gr.Slider(label="부채비율 (%)", minimum=0, maximum=500, value=0)
573
+ credit_grade = gr.Dropdown(label="신용등급", choices=CREDIT_GRADE_OPTIONS, value="미평가")
574
+ tcb_grade = gr.Dropdown(label="TCB 등급", choices=TCB_GRADE_OPTIONS, value="미평가")
575
+ capital_impairment = gr.Checkbox(label="자본잠식 여부")
576
+
577
+ with gr.Column():
578
+ gr.HTML('<div class="section-header">🔬 R&D 투자</div>')
579
+ rd_investment = gr.Number(label="연간 R&D 투자액 (백만원)", value=0, minimum=0)
580
+ gov_project_exp = gr.Checkbox(label="정부과제 수행 경험")
581
+ gov_support_3yr = gr.Number(label="최근 3년 정부지원금 (백만원)", value=0, minimum=0)
582
+
583
+ # 서브탭 4: 기술분야/제한사항
584
+ with gr.Tab("4️⃣ 기술분야/제한사항"):
585
+ with gr.Row():
586
+ with gr.Column():
587
+ gr.HTML('<div class="section-header">🔬 기술 분야</div>')
588
+ core_industry = gr.CheckboxGroup(label="10대 핵심산업", choices=CORE_INDUSTRY_OPTIONS)
589
+ strategic_tech = gr.CheckboxGroup(label="12대 국가전략기술", choices=NATIONAL_STRATEGIC_TECH)
590
+ green_tech = gr.Checkbox(label="녹색기술 분야")
591
+ digital_transform = gr.Checkbox(label="디지털전환 분야")
592
+ defense_industry = gr.Checkbox(label="국방/방산 분야")
593
+
594
+ with gr.Column():
595
+ gr.HTML('<div class="section-header">📜 인증/ISO</div>')
596
+ iso_certs = gr.CheckboxGroup(label="ISO 인증", choices=ISO_CERT_OPTIONS)
597
+ gmp_cert = gr.Checkbox(label="GMP 인증")
598
+
599
+ with gr.Column():
600
+ gr.HTML('<div class="section-header">⚠️ 결격사유 확인</div>')
601
+ tax_delinquent = gr.Checkbox(label="국세 체납")
602
+ local_tax_delinquent = gr.Checkbox(label="지방세 체납")
603
+ gov_project_fail = gr.Checkbox(label="정부과제 불성실")
604
+ bankruptcy = gr.Checkbox(label="휴/폐업 이력")
605
+ financial_default = gr.Checkbox(label="금융기관 연체")
606
+
607
+ # 서브탭 5: 문서 업로드 및 매칭
608
+ with gr.Tab("5️⃣ 문서 업로드 & 매칭"):
609
+ gr.HTML("""
610
+ <div style="background: #EBF5FF; border: 1px solid #3B82F6; border-radius: 8px; padding: 16px; margin-bottom: 16px;">
611
+ <h3 style="margin: 0 0 8px 0; color: #1E40AF;">📁 문서 업로드</h3>
612
+ <p style="margin: 0; color: #1E3A8A;">사업자등록증, 등기부등본, 재무제표, 중소기업확인서 등을 업로드하면 AI가 자동으로 정보를 추출합니다.</p>
613
+ </div>
614
+ """)
615
+
616
+ with gr.Row():
617
+ with gr.Column(scale=1):
618
+ file_upload = gr.File(
619
+ label="📎 문서 업로드 (HWP, PDF, TXT, XLSX)",
620
+ file_count="multiple",
621
+ file_types=[".hwp", ".hwpx", ".pdf", ".txt", ".xlsx", ".xls"]
622
+ )
623
+ analyze_docs_btn = gr.Button("📄 문서 분석", variant="secondary", size="lg")
624
+
625
+ with gr.Column(scale=2):
626
+ doc_analysis_output = gr.Markdown(value="### 📄 문서 분석 결과\n\n*문서를 업로드하고 분석 버튼을 클릭하세요*", height=400)
627
+
628
+ gr.HTML('<hr style="margin: 24px 0;">')
629
+
630
+ with gr.Row():
631
+ save_profile_btn = gr.Button("💾 프로필 저장", variant="secondary", size="lg")
632
+ match_btn = gr.Button("🎯 맞춤 과제 매칭 시작", variant="primary", size="lg", elem_classes=["match-btn"])
633
+
634
+ profile_status = gr.Textbox(label="프로필 저장 상태", interactive=False)
635
+ match_output = gr.Markdown(value="### 🎯 매칭 결과\n\n*프로필을 저장하고 매칭 버튼을 클릭하세요*", height=500)
636
+
637
+ # ============================================================
638
+ # 이벤트 핸들러
639
+ # ============================================================
640
+ def search_fn(keyword, category, region, org_type, sort_by, status_filter, page, rows):
641
+ df, status = fetch_announcements(keyword or "", category or "전체", region or "전체(지역)",
642
+ org_type or "전체", sort_by or "등록일순",
643
+ status_filter or "진행중", int(page) if page else 1, int(rows) if rows else 20)
644
+ display_cols = ["번호", "지원분야", "지원사업명", "신청기간", "소관부처", "등록일", "지원대상"]
645
+ display_df = df[[c for c in display_cols if c in df.columns]] if not df.empty else df
646
+ return display_df, status, df
647
+
648
+ def prev_fn(page, keyword, category, region, org_type, sort_by, status_filter, rows):
649
+ new_page = max(1, int(page) - 1) if page else 1
650
+ df, status = fetch_announcements(keyword or "", category or "전체", region or "전체(지역)",
651
+ org_type or "전체", sort_by or "등록일순",
652
+ status_filter or "진행중", new_page, int(rows) if rows else 20)
653
+ display_cols = ["번호", "지원분야", "지원사업명", "신청기간", "소관부처", "등록일", "지원대상"]
654
+ display_df = df[[c for c in display_cols if c in df.columns]] if not df.empty else df
655
+ return display_df, status, df, new_page
656
+
657
+ def next_fn(page, keyword, category, region, org_type, sort_by, status_filter, rows):
658
+ new_page = int(page) + 1 if page else 2
659
+ df, status = fetch_announcements(keyword or "", category or "전체", region or "전체(지역)",
660
+ org_type or "전체", sort_by or "등록일순",
661
+ status_filter or "진행중", new_page, int(rows) if rows else 20)
662
+ display_cols = ["번호", "지원분야", "지원사업명", "신청기간", "소관부처", "등록일", "지원대상"]
663
+ display_df = df[[c for c in display_cols if c in df.columns]] if not df.empty else df
664
+ return display_df, status, df, new_page
665
+
666
+ def export_to_csv(df):
667
+ if df is None or (isinstance(df, pd.DataFrame) and df.empty):
668
+ return None
669
+ filepath = "/tmp/bizinfo_announcements.csv"
670
+ export_cols = [c for c in df.columns if not c.startswith("_")]
671
+ df[export_cols].to_csv(filepath, index=False, encoding='utf-8-sig')
672
+ return filepath
673
+
674
+ def on_row_select(evt: gr.SelectData, df):
675
+ if evt.index[0] < len(df):
676
+ row = df.iloc[evt.index[0]]
677
+ url = row.get("상세링크", "")
678
+ name = row.get("지원사업명", "")
679
+ attachments = row.get("첨부파일", [])
680
+ description = row.get("사업개요", "")
681
+ att_info = ""
682
+ if attachments and len(attachments) > 0:
683
+ att_info = f"\n\n📎 첨부파일 {len(attachments)}개:"
684
+ for att in attachments:
685
+ att_info += f"\n - {att.get('filename', '파일')}"
686
+ info = f"📌 {name}\n\n🔗 {url}{att_info}"
687
+ return url, name, attachments, description, info
688
+ return "", "", [], "", ""
689
+
690
+ def save_profile_fn(biz_num, corp_num, comp_name, est_date, comp_type, corp_tp, comp_size,
691
+ venture, innobiz, mainbiz, sme, small_biz, social, startup,
692
+ sido, sigungu, innov_city, ind_complex, free_z, non_cap,
693
+ ind_major, is_manu, is_know,
694
+ ceo_nm, ceo_gen, ceo_birth, youth, senior, women, disabled,
695
+ ins_emp, reg_emp, youth_emp, fem_ratio, new_hire,
696
+ rd_per, phd, res_ctr, rd_dep, patent,
697
+ rev_cur, rev_prev, op_profit, net_inc, export,
698
+ cap, assets, debt, credit, tcb, impair,
699
+ rd_inv, gov_exp, gov_sup,
700
+ core_ind, strat_tech, green, digital, defense,
701
+ iso, gmp,
702
+ tax_del, local_tax, gov_fail, bankrupt, fin_def):
703
+
704
+ profile = {
705
+ "사업자정보": {
706
+ "사업자등록번호": biz_num, "법인등록번호": corp_num, "상호": comp_name,
707
+ "설립일자": est_date, "기업형태": comp_type, "법인종류": corp_tp, "기업규모": comp_size
708
+ },
709
+ "인증현황": {
710
+ "벤처기업": venture, "이노비즈": innobiz, "메인비즈": mainbiz,
711
+ "중소기업확인서": sme, "소상공인확인서": small_biz, "소셜벤처": social, "스타트업": startup
712
+ },
713
+ "소재지": {
714
+ "시도": sido, "시군구": sigungu, "혁신도시": innov_city,
715
+ "산업단지": ind_complex, "규제자유특구": free_z, "비수도권": non_cap
716
+ },
717
+ "업종": {"대분류": ind_major, "제조업": is_manu, "지식서비스업": is_know},
718
+ "대표자": {
719
+ "이름": ceo_nm, "성별": ceo_gen, "생년월일": ceo_birth,
720
+ "청년창업자": youth, "시니어창업자": senior, "여성기업": women, "장애인기업": disabled
721
+ },
722
+ "고용현황": {
723
+ "4대보험가입자": ins_emp, "상시근로자": reg_emp, "청년고용": youth_emp,
724
+ "여성비율": fem_ratio, "신규채용계획": new_hire
725
+ },
726
+ "연구역량": {
727
+ "연구인력": rd_per, "박사급": phd, "부설연구소": res_ctr,
728
+ "전담부서": rd_dep, "특허수": patent
729
+ },
730
+ "재무정보": {
731
+ "매출액_당해": rev_cur, "매출���_전년": rev_prev, "영업이익": op_profit,
732
+ "당기순이익": net_inc, "수출액": export, "자본금": cap, "자산총계": assets,
733
+ "부채비율": debt, "신용등급": credit, "TCB등급": tcb, "자본잠식": impair
734
+ },
735
+ "R&D투자": {"연간투자액": rd_inv, "정부과제경험": gov_exp, "최근3년지원금": gov_sup},
736
+ "기술분야": {
737
+ "핵심산업": core_ind, "국가전략기술": strat_tech,
738
+ "녹색기술": green, "디지털전환": digital, "국방방산": defense
739
+ },
740
+ "인증": {"ISO": iso, "GMP": gmp},
741
+ "결격사유": {
742
+ "국세체납": tax_del, "지방세체납": local_tax, "불성실이력": gov_fail,
743
+ "휴폐업": bankrupt, "금융연체": fin_def
744
+ }
745
+ }
746
+ return profile, "✅ 프로필이 저장되었습니다."
747
+
748
+ # 이벤트 연결
749
+ search_btn.click(fn=search_fn, inputs=[keyword_input, category_dropdown, region_dropdown, org_type_dropdown,
750
+ sort_dropdown, status_dropdown, page_input, rows_dropdown],
751
+ outputs=[results_output, status_output, current_df])
752
+
753
+ keyword_input.submit(fn=search_fn, inputs=[keyword_input, category_dropdown, region_dropdown, org_type_dropdown,
754
+ sort_dropdown, status_dropdown, page_input, rows_dropdown],
755
+ outputs=[results_output, status_output, current_df])
756
+
757
+ # 페이지네이션 이벤트
758
+ prev_btn.click(fn=prev_fn, inputs=[page_input, keyword_input, category_dropdown, region_dropdown,
759
+ org_type_dropdown, sort_dropdown, status_dropdown, rows_dropdown],
760
+ outputs=[results_output, status_output, current_df, page_input])
761
+
762
+ next_btn.click(fn=next_fn, inputs=[page_input, keyword_input, category_dropdown, region_dropdown,
763
+ org_type_dropdown, sort_dropdown, status_dropdown, rows_dropdown],
764
+ outputs=[results_output, status_output, current_df, page_input])
765
+
766
+ # CSV 내보내기
767
+ export_btn.click(fn=export_to_csv, inputs=[current_df], outputs=[csv_output])
768
+
769
+ results_output.select(fn=on_row_select, inputs=[current_df],
770
+ outputs=[selected_url, selected_name, selected_attachments, selected_description, selected_info])
771
+
772
+ analyze_btn.click(fn=analyze_announcement, inputs=[selected_url, selected_name, selected_attachments, selected_description],
773
+ outputs=[analysis_output])
774
+
775
+ analyze_docs_btn.click(fn=analyze_uploaded_documents, inputs=[file_upload], outputs=[doc_analysis_output])
776
+
777
+ save_profile_btn.click(
778
+ fn=save_profile_fn,
779
+ inputs=[biz_number, corp_number, company_name, establish_date, company_type, corp_type, company_size,
780
+ venture_cert, innobiz_cert, mainbiz_cert, sme_cert, small_biz_cert, social_venture, startup_flag,
781
+ hq_sido, hq_sigungu, innovation_city, industrial_complex, free_zone, non_capital,
782
+ industry_major, is_manufacturing, is_knowledge_service,
783
+ ceo_name, ceo_gender, ceo_birthdate, youth_ceo, senior_ceo, women_company, disabled_company,
784
+ insurance_employees, regular_employees, youth_employees, female_ratio, new_hire_plan,
785
+ rd_personnel, phd_researchers, research_center, rd_dept, patent_count,
786
+ revenue_current, revenue_prev, operating_profit, net_income, export_amount,
787
+ capital, total_assets, debt_ratio, credit_grade, tcb_grade, capital_impairment,
788
+ rd_investment, gov_project_exp, gov_support_3yr,
789
+ core_industry, strategic_tech, green_tech, digital_transform, defense_industry,
790
+ iso_certs, gmp_cert,
791
+ tax_delinquent, local_tax_delinquent, gov_project_fail, bankruptcy, financial_default],
792
+ outputs=[company_profile, profile_status]
793
+ )
794
+
795
+ match_btn.click(fn=match_announcements_with_profile, inputs=[company_profile, current_df], outputs=[match_output])
796
+
797
+ return demo
798
+
799
+
800
+ if __name__ == "__main__":
801
+ demo = create_interface()
802
+ demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)