scipious commited on
Commit
b47fa88
·
verified ·
1 Parent(s): 5eaa26e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +263 -252
app.py CHANGED
@@ -1,253 +1,264 @@
1
- # app.py
2
- from flask import Flask, render_template, jsonify, request
3
- from flask_socketio import SocketIO
4
- import threading
5
- import os
6
- import sqlite3
7
- import gc
8
- import time
9
- import re
10
-
11
- # --- 외부 모듈 임포트 ---
12
- import reg_embedding_system
13
- import leximind_prompts
14
-
15
- # --- Together AI SDK ---
16
- from together import Together
17
-
18
- # --- eventlet monkey patch (Gunicorn + SocketIO 필수!) ---
19
- import eventlet
20
- eventlet.monkey_patch()
21
-
22
- # --- Flask & SocketIO 설정 ---
23
- app = Flask(__name__)
24
- socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
25
-
26
- # --- 전역 변수 ---
27
- connected_clients = 0
28
- search_document_number = 30
29
- Filtered_search = False
30
- filters = {"regulation_part": []}
31
-
32
- # --- 경로 설정 ---
33
- current_dir = os.path.dirname(os.path.abspath(__file__))
34
- ResultFile_FolderAddress = os.path.join(current_dir, 'result.txt')
35
-
36
- # --- RAG 데이터 경로 ---
37
- region_paths = {
38
- "국내": "/app/data/KMVSS_RAG",
39
- "북미": "/app/data/FMVSS_RAG",
40
- "유럽": "/app/data/EUR_RAG"
41
- }
42
-
43
- # --- 프롬프트 ---
44
- lexi_prompts = leximind_prompts.PromptLibrary()
45
-
46
- # --- RAG 객체 ---
47
- region_rag_objects = {}
48
-
49
- # --- Together AI 클라이언트 ---
50
- TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
51
- if not TOGETHER_API_KEY:
52
- raise EnvironmentError("TOGETHER_API_KEY가 설정되지 않았습니다. Hugging Face Secrets에 추가하세요.")
53
- client = Together(api_key=TOGETHER_API_KEY)
54
-
55
- # --- RAG 로딩 ---
56
- def load_rag_objects():
57
- global region_rag_objects
58
- for region, path in region_paths.items():
59
- if not os.path.exists(path):
60
- msg = f"[{region}] 경로 없음: {path}"
61
- socketio.emit('message', {'message': msg})
62
- print(msg)
63
- continue
64
-
65
- try:
66
- socketio.emit('message', {'message': f"[{region}] RAG 로딩 중..."})
67
- ensemble_retriever, vectorstore, sqlite_conn = reg_embedding_system.load_embedding_from_faiss(path)
68
- sqlite_conn.close()
69
- db_path = os.path.join(path, "metadata_mapping.db")
70
- new_conn = sqlite3.connect(db_path, check_same_thread=False)
71
-
72
- region_rag_objects[region] = {
73
- "ensemble_retriever": ensemble_retriever,
74
- "vectorstore": vectorstore,
75
- "sqlite_conn": new_conn
76
- }
77
- socketio.emit('message', {'message': f"[{region}] 로딩 완료"})
78
- print(f"[{region}] RAG 로딩 완료")
79
-
80
- except Exception as e:
81
- error_msg = f"[{region}] 로딩 실패: {str(e)}"
82
- print(error_msg)
83
- socketio.emit('message', {'message': error_msg})
84
-
85
- socketio.emit('message', {'message': "Ready to Search"})
86
- print("Ready to Search")
87
-
88
- # --- ---
89
- @app.route('/')
90
- def index():
91
- return render_template('chat.html')
92
-
93
- # --- 메시지 ---
94
- @app.route('/get_message', methods=['POST'])
95
- def get_message():
96
- global Filtered_search, filters
97
- data = request.get_json()
98
- query = data.get('query', '').strip()
99
- regions = data.get('regions', [])
100
- selected_regulations = data.get('selectedRegulations', [])
101
-
102
- filters = {"regulation_part": []}
103
- Filtered_search = bool(selected_regulations)
104
- if selected_regulations:
105
- for reg in selected_regulations:
106
- title = reg.get('title', '')
107
- if title:
108
- filters["regulation_part"].append(title)
109
-
110
- Rag_Results = search_DB_from_multiple_regions(query, regions, region_rag_objects)
111
- AImessage = RegAI(query, Rag_Results, ResultFile_FolderAddress)
112
-
113
- return jsonify(message=AImessage)
114
-
115
- # --- 법규 리스트 ---
116
- @app.route('/get_reg_list', methods=['POST'])
117
- def get_reg_list():
118
- data = request.get_json()
119
- selected_regions = data.get('regions', []) or ["국내", "북미", "유럽"]
120
-
121
- all_reg_list_part = []
122
- for region in selected_regions:
123
- rag = region_rag_objects.get(region)
124
- if not rag:
125
- continue
126
- try:
127
- conn = rag["sqlite_conn"]
128
- parts = reg_embedding_system.get_unique_metadata_values(conn, "regulation_part")
129
- all_reg_list_part.extend(parts)
130
- except Exception as e:
131
- print(f"[{region}] 법규 로드 실패: {e}")
132
-
133
- unique_parts = sorted(set(all_reg_list_part), key=reg_embedding_system.natural_sort_key)
134
- return jsonify(reg_list_part="\n".join(unique_parts))
135
-
136
- # --- SocketIO ---
137
- @socketio.on('connect')
138
- def handle_connect():
139
- global connected_clients
140
- connected_clients += 1
141
- print(f"클라이언트 연결: {connected_clients}명")
142
-
143
- @socketio.on('disconnect')
144
- def handle_disconnect():
145
- global connected_clients
146
- connected_clients -= 1
147
- print(f"연결 해제: {connected_clients}명")
148
- if connected_clients <= 0:
149
- cleanup_connections()
150
- print("서버 종료")
151
- os._exit(0)
152
-
153
- def cleanup_connections():
154
- for region, rag in region_rag_objects.items():
155
- try:
156
- rag["sqlite_conn"].close()
157
- print(f"[{region}] DB 연결 종료")
158
- except:
159
- pass
160
-
161
- # --- Together AI 분석 ---
162
- def Gemma3_AI_analysis(query_txt, content_txt):
163
- content_txt = "\n".join(doc.page_content for doc in content_txt) if isinstance(content_txt, list) else str(content_txt)
164
- query_txt = str(query_txt)
165
- prompt = lexi_prompts.use_prompt(lexi_prompts.AI_system_prompt, query_txt=query_txt, content_txt=content_txt)
166
-
167
- response = client.chat.completions.create(
168
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
169
- messages=[{"role": "user", "content": prompt}],
170
- max_tokens=1024,
171
- temperature=0.7
172
- )
173
- return response.choices[0].message.content
174
-
175
- # --- Together AI 번역 ---
176
- def Gemma3_AI_Translate(query_txt):
177
- query_txt = str(query_txt)
178
- prompt = lexi_prompts.use_prompt(lexi_prompts.query_translator, query_txt=query_txt)
179
-
180
- response = client.chat.completions.create(
181
- model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
182
- messages=[{"role": "user", "content": prompt}],
183
- max_tokens=512,
184
- temperature=0.3
185
- )
186
- return response.choices[0].message.content
187
-
188
- # --- 검색 ---
189
- def search_DB_from_multiple_regions(query, selected_regions, region_rag_objects):
190
- selected_regions = selected_regions or list(region_rag_objects.keys())
191
- query = Gemma3_AI_Translate(query)
192
- print(f"번역된 쿼리: {query}")
193
-
194
- combined_results = []
195
- for region in selected_regions:
196
- rag = region_rag_objects.get(region)
197
- if not rag:
198
- continue
199
-
200
- retriever = rag["ensemble_retriever"]
201
- vectorstore = rag["vectorstore"]
202
- sqlite_conn = rag["sqlite_conn"]
203
-
204
- if Filtered_search:
205
- results = reg_embedding_system.search_with_metadata_filter(
206
- ensemble_retriever=retriever,
207
- vectorstore=vectorstore,
208
- query=query,
209
- k=search_document_number,
210
- metadata_filter=filters,
211
- sqlite_conn=sqlite_conn
212
- )
213
- else:
214
- results = reg_embedding_system.smart_search_vectorstore(
215
- retriever=retriever,
216
- query=query,
217
- k=search_document_number,
218
- vectorstore=vectorstore,
219
- sqlite_conn=sqlite_conn,
220
- enable_detailed_search=True
221
- )
222
- print(f"[{region}] 검색: {len(results)}건")
223
- combined_results.extend(results)
224
-
225
- return combined_results
226
-
227
- # --- 최종 AI ---
228
- def RegAI(query, Rag_Results, ResultFile_FolderAddress):
229
- gc.collect()
230
- AI_Result = "검색 결과가 없습니다." if not Rag_Results else Gemma3_AI_analysis(query, Rag_Results)
231
-
232
- with open(ResultFile_FolderAddress, 'w', encoding='utf-8') as f:
233
- print("검색된 문서:", file=f)
234
- for i, doc in enumerate(Rag_Results):
235
- print(f"문서 {i+1}: {doc.page_content[:200]}... (메타: {doc.metadata})", file=f)
236
- print("\n답변:", file=f)
237
- print(AI_Result, file=f)
238
-
239
- return AI_Result
240
-
241
- # --- 실행 ---
242
- if __name__ == '__main__':
243
- # 로컬 개발용
244
- threading.Thread(target=load_rag_objects, daemon=True).start()
245
- time.sleep(2)
246
- socketio.emit('message', {'message': '데이터 로딩 시작...'})
247
- socketio.run(app, host='0.0.0.0', port=7860, debug=False)
248
- else:
249
- # Gunicorn용: 워커 시작 후 로딩
250
- import atexit
251
- loading_thread = threading.Thread(target=load_rag_objects, daemon=True)
252
- loading_thread.start()
 
 
 
 
 
 
 
 
 
 
 
253
  atexit.register(cleanup_connections)
 
1
+ # app.py
2
+ from flask import Flask, render_template, jsonify, request
3
+ from flask_socketio import SocketIO
4
+ import threading
5
+ import os
6
+ import sqlite3
7
+ import gc
8
+ import time
9
+ import re
10
+
11
+ # --- 외부 모듈 임포트 ---
12
+ import reg_embedding_system
13
+ import leximind_prompts
14
+
15
+ # --- Together AI SDK ---
16
+ from together import Together
17
+
18
+ # --- eventlet monkey patch (Gunicorn + SocketIO 필수!) ---
19
+ import eventlet
20
+ eventlet.monkey_patch()
21
+
22
+ # --- Flask & SocketIO 설정 ---
23
+ app = Flask(__name__)
24
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
25
+
26
+ # --- 전역 변수 ---
27
+ connected_clients = 0
28
+ search_document_number = 30
29
+ Filtered_search = False
30
+ filters = {"regulation_part": []}
31
+
32
+ # --- 경로 설정 ---
33
+ current_dir = os.path.dirname(os.path.abspath(__file__))
34
+ ResultFile_FolderAddress = os.path.join(current_dir, 'result.txt')
35
+
36
+ # --- RAG 데이터 경로 ---
37
+ region_paths = {
38
+ "국내": "/app/data/KMVSS_RAG",
39
+ "북미": "/app/data/FMVSS_RAG",
40
+ "유럽": "/app/data/EUR_RAG"
41
+ }
42
+
43
+ # --- 프롬프트 ---
44
+ lexi_prompts = leximind_prompts.PromptLibrary()
45
+
46
+ # --- RAG 객체 ---
47
+ region_rag_objects = {}
48
+
49
+ # --- Together AI 클라이언트 ---
50
+ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
51
+ if not TOGETHER_API_KEY:
52
+ raise EnvironmentError("TOGETHER_API_KEY가 설정되지 않았습니다. Hugging Face Secrets에 추가하세요.")
53
+ client = Together(api_key=TOGETHER_API_KEY)
54
+
55
+ # --- RAG 로딩 ---
56
+ def load_rag_objects():
57
+ global region_rag_objects
58
+ regions = list(region_paths.keys())
59
+ total = len(regions)
60
+
61
+ for idx, (region, path) in enumerate(region_paths.items(), 1):
62
+ if not os.path.exists(path):
63
+ msg = f"[{region}] 경로 없음: {path} ({idx}/{total})"
64
+ socketio.emit('message', {'message': msg})
65
+ print(msg)
66
+ continue
67
+
68
+ try:
69
+ socketio.emit('message', {'message': f"[{region}] RAG 로딩 중... ({idx}/{total})"})
70
+ print(f"[{region}] 로딩 시작: {path}")
71
+
72
+ # FAISS 로드
73
+ ensemble_retriever, vectorstore, sqlite_conn = reg_embedding_system.load_embedding_from_faiss(path)
74
+ sqlite_conn.close()
75
+
76
+ # SQLite 재연결
77
+ db_path = os.path.join(path, "metadata_mapping.db")
78
+ if not os.path.exists(db_path):
79
+ raise FileNotFoundError(f"DB 없음: {db_path}")
80
+ new_conn = sqlite3.connect(db_path, check_same_thread=False)
81
+
82
+ region_rag_objects[region] = {
83
+ "ensemble_retriever": ensemble_retriever,
84
+ "vectorstore": vectorstore,
85
+ "sqlite_conn": new_conn
86
+ }
87
+
88
+ socketio.emit('message', {'message': f"[{region}] 로딩 완료 ({idx}/{total})"})
89
+ print(f"[{region}] 로딩 완료")
90
+
91
+ except Exception as e:
92
+ error_msg = f"[{region}] 로딩 실패: {str(e)} ({idx}/{total})"
93
+ print(error_msg)
94
+ socketio.emit('message', {'message': error_msg})
95
+
96
+ socketio.emit('message', {'message': "Ready to Search"})
97
+ print("=== 모든 RAG 로딩 완료 ===")
98
+
99
+ # --- ---
100
+ @app.route('/')
101
+ def index():
102
+ return render_template('chat.html')
103
+
104
+ # --- 메시지 ---
105
+ @app.route('/get_message', methods=['POST'])
106
+ def get_message():
107
+ global Filtered_search, filters
108
+ data = request.get_json()
109
+ query = data.get('query', '').strip()
110
+ regions = data.get('regions', [])
111
+ selected_regulations = data.get('selectedRegulations', [])
112
+
113
+ filters = {"regulation_part": []}
114
+ Filtered_search = bool(selected_regulations)
115
+ if selected_regulations:
116
+ for reg in selected_regulations:
117
+ title = reg.get('title', '')
118
+ if title:
119
+ filters["regulation_part"].append(title)
120
+
121
+ Rag_Results = search_DB_from_multiple_regions(query, regions, region_rag_objects)
122
+ AImessage = RegAI(query, Rag_Results, ResultFile_FolderAddress)
123
+
124
+ return jsonify(message=AImessage)
125
+
126
+ # --- 법규 리스트 ---
127
+ @app.route('/get_reg_list', methods=['POST'])
128
+ def get_reg_list():
129
+ data = request.get_json()
130
+ selected_regions = data.get('regions', []) or ["국내", "북미", "유럽"]
131
+
132
+ all_reg_list_part = []
133
+ for region in selected_regions:
134
+ rag = region_rag_objects.get(region)
135
+ if not rag:
136
+ continue
137
+ try:
138
+ conn = rag["sqlite_conn"]
139
+ parts = reg_embedding_system.get_unique_metadata_values(conn, "regulation_part")
140
+ all_reg_list_part.extend(parts)
141
+ except Exception as e:
142
+ print(f"[{region}] 법규 로드 실패: {e}")
143
+
144
+ unique_parts = sorted(set(all_reg_list_part), key=reg_embedding_system.natural_sort_key)
145
+ return jsonify(reg_list_part="\n".join(unique_parts))
146
+
147
+ # --- SocketIO ---
148
+ @socketio.on('connect')
149
+ def handle_connect():
150
+ global connected_clients
151
+ connected_clients += 1
152
+ print(f"클라이언트 연결: {connected_clients}명")
153
+
154
+ @socketio.on('disconnect')
155
+ def handle_disconnect():
156
+ global connected_clients
157
+ connected_clients -= 1
158
+ print(f"연결 해제: {connected_clients}명")
159
+ if connected_clients <= 0:
160
+ cleanup_connections()
161
+ print("서버 종료")
162
+ os._exit(0)
163
+
164
+ def cleanup_connections():
165
+ for region, rag in region_rag_objects.items():
166
+ try:
167
+ rag["sqlite_conn"].close()
168
+ print(f"[{region}] DB 연결 종료")
169
+ except:
170
+ pass
171
+
172
+ # --- Together AI 분석 ---
173
+ def Gemma3_AI_analysis(query_txt, content_txt):
174
+ content_txt = "\n".join(doc.page_content for doc in content_txt) if isinstance(content_txt, list) else str(content_txt)
175
+ query_txt = str(query_txt)
176
+ prompt = lexi_prompts.use_prompt(lexi_prompts.AI_system_prompt, query_txt=query_txt, content_txt=content_txt)
177
+
178
+ response = client.chat.completions.create(
179
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
180
+ messages=[{"role": "user", "content": prompt}],
181
+ max_tokens=1024,
182
+ temperature=0.7
183
+ )
184
+ return response.choices[0].message.content
185
+
186
+ # --- Together AI 번역 ---
187
+ def Gemma3_AI_Translate(query_txt):
188
+ query_txt = str(query_txt)
189
+ prompt = lexi_prompts.use_prompt(lexi_prompts.query_translator, query_txt=query_txt)
190
+
191
+ response = client.chat.completions.create(
192
+ model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
193
+ messages=[{"role": "user", "content": prompt}],
194
+ max_tokens=512,
195
+ temperature=0.3
196
+ )
197
+ return response.choices[0].message.content
198
+
199
+ # --- 검색 ---
200
+ def search_DB_from_multiple_regions(query, selected_regions, region_rag_objects):
201
+ selected_regions = selected_regions or list(region_rag_objects.keys())
202
+ query = Gemma3_AI_Translate(query)
203
+ print(f"번역된 쿼리: {query}")
204
+
205
+ combined_results = []
206
+ for region in selected_regions:
207
+ rag = region_rag_objects.get(region)
208
+ if not rag:
209
+ continue
210
+
211
+ retriever = rag["ensemble_retriever"]
212
+ vectorstore = rag["vectorstore"]
213
+ sqlite_conn = rag["sqlite_conn"]
214
+
215
+ if Filtered_search:
216
+ results = reg_embedding_system.search_with_metadata_filter(
217
+ ensemble_retriever=retriever,
218
+ vectorstore=vectorstore,
219
+ query=query,
220
+ k=search_document_number,
221
+ metadata_filter=filters,
222
+ sqlite_conn=sqlite_conn
223
+ )
224
+ else:
225
+ results = reg_embedding_system.smart_search_vectorstore(
226
+ retriever=retriever,
227
+ query=query,
228
+ k=search_document_number,
229
+ vectorstore=vectorstore,
230
+ sqlite_conn=sqlite_conn,
231
+ enable_detailed_search=True
232
+ )
233
+ print(f"[{region}] 검색: {len(results)}건")
234
+ combined_results.extend(results)
235
+
236
+ return combined_results
237
+
238
+ # --- 최종 AI ---
239
+ def RegAI(query, Rag_Results, ResultFile_FolderAddress):
240
+ gc.collect()
241
+ AI_Result = "검색 결과가 없습니다." if not Rag_Results else Gemma3_AI_analysis(query, Rag_Results)
242
+
243
+ with open(ResultFile_FolderAddress, 'w', encoding='utf-8') as f:
244
+ print("검색된 문서:", file=f)
245
+ for i, doc in enumerate(Rag_Results):
246
+ print(f"문서 {i+1}: {doc.page_content[:200]}... (메타: {doc.metadata})", file=f)
247
+ print("\n답변:", file=f)
248
+ print(AI_Result, file=f)
249
+
250
+ return AI_Result
251
+
252
+ # --- 실행 ---
253
+ if __name__ == '__main__':
254
+ # 로컬 개발용
255
+ threading.Thread(target=load_rag_objects, daemon=True).start()
256
+ time.sleep(2)
257
+ socketio.emit('message', {'message': '데이터 로딩 시작...'})
258
+ socketio.run(app, host='0.0.0.0', port=7860, debug=False)
259
+ else:
260
+ # Gunicorn용: 워커 시작 후 로딩
261
+ import atexit
262
+ loading_thread = threading.Thread(target=load_rag_objects, daemon=True)
263
+ loading_thread.start()
264
  atexit.register(cleanup_connections)