scipious commited on
Commit
4dfd818
·
verified ·
1 Parent(s): 2586e45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -242
app.py CHANGED
@@ -1,242 +1,251 @@
1
- # app.py
2
- from flask import Flask, render_template, jsonify, request
3
- from flask_socketio import SocketIO
4
- import threading
5
- import os
6
- import sqlite3
7
- import gc
8
- import time
9
- import re
10
-
11
- # --- 외부 모듈 임포트 ---
12
- import reg_embedding_system
13
- import leximind_prompts
14
-
15
- # --- Together AI SDK ---
16
- from together import Together
17
-
18
- # --- Flask & SocketIO 설정 ---
19
- app = Flask(__name__)
20
- socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
21
-
22
- # --- 전역 변수 ---
23
- connected_clients = 0
24
- search_document_number = 30
25
- Filtered_search = False
26
- filters = {"regulation_part": []}
27
-
28
- # --- 경로 설정 ---
29
- current_dir = os.path.dirname(os.path.abspath(__file__))
30
- ResultFile_FolderAddress = os.path.join(current_dir, 'result.txt')
31
-
32
- # --- RAG 데이터 경로 ---
33
- region_paths = {
34
- "국내": "/app/data/KMVSS_RAG",
35
- "북미": "/app/data/FMVSS_RAG",
36
- "유럽": "/app/data/EUR_RAG"
37
- }
38
-
39
- # --- 프롬프트 ---
40
- lexi_prompts = leximind_prompts.PromptLibrary()
41
-
42
- # --- RAG 객체 ---
43
- region_rag_objects = {}
44
-
45
- # --- Together AI 클라이언트 (Hugging Face Secrets에서 API 키 가져오기) ---
46
- TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
47
- if not TOGETHER_API_KEY:
48
- raise EnvironmentError("TOGETHER_API_KEY가 설정되지 않았습니다. Hugging Face Secrets에 추가하세요.")
49
-
50
- client = Together(api_key=TOGETHER_API_KEY)
51
-
52
- # --- RAG 로딩 ---
53
- def load_rag_objects():
54
- global region_rag_objects
55
- for region, path in region_paths.items():
56
- if not os.path.exists(path):
57
- msg = f"[{region}] 경로 없음: {path}"
58
- socketio.emit('message', {'message': msg})
59
- print(msg)
60
- continue
61
-
62
- try:
63
- socketio.emit('message', {'message': f"[{region}] RAG 로딩 중..."})
64
- ensemble_retriever, vectorstore, sqlite_conn = reg_embedding_system.load_embedding_from_faiss(path)
65
- sqlite_conn.close()
66
- db_path = os.path.join(path, "metadata_mapping.db")
67
- new_conn = sqlite3.connect(db_path, check_same_thread=False)
68
-
69
- region_rag_objects[region] = {
70
- "ensemble_retriever": ensemble_retriever,
71
- "vectorstore": vectorstore,
72
- "sqlite_conn": new_conn
73
- }
74
- socketio.emit('message', {'message': f"[{region}] 로딩 완료"})
75
- print(f"[{region}] RAG 로딩 완료")
76
-
77
- except Exception as e:
78
- error_msg = f"[{region}] 로딩 실패: {str(e)}"
79
- print(error_msg)
80
- socketio.emit('message', {'message': error_msg})
81
-
82
- socketio.emit('message', {'message': "Ready to Search"})
83
- print("Ready to Search")
84
-
85
- # --- 웹 ---
86
- @app.route('/')
87
- def index():
88
- return render_template('chat.html')
89
-
90
- # --- 메시지 ---
91
- @app.route('/get_message', methods=['POST'])
92
- def get_message():
93
- global Filtered_search, filters
94
- data = request.get_json()
95
- query = data.get('query', '').strip()
96
- regions = data.get('regions', [])
97
- selected_regulations = data.get('selectedRegulations', [])
98
-
99
- filters = {"regulation_part": []}
100
- Filtered_search = bool(selected_regulations)
101
- if selected_regulations:
102
- for reg in selected_regulations:
103
- title = reg.get('title', '')
104
- if title:
105
- filters["regulation_part"].append(title)
106
-
107
- Rag_Results = search_DB_from_multiple_regions(query, regions, region_rag_objects)
108
- AImessage = RegAI(query, Rag_Results, ResultFile_FolderAddress)
109
-
110
- return jsonify(message=AImessage)
111
-
112
- # --- 법규 리스트 ---
113
- @app.route('/get_reg_list', methods=['POST'])
114
- def get_reg_list():
115
- data = request.get_json()
116
- selected_regions = data.get('regions', []) or ["국내", "북미", "유럽"]
117
-
118
- all_reg_list_part = []
119
- for region in selected_regions:
120
- rag = region_rag_objects.get(region)
121
- if not rag: continue
122
- try:
123
- conn = rag["sqlite_conn"]
124
- parts = reg_embedding_system.get_unique_metadata_values(conn, "regulation_part")
125
- all_reg_list_part.extend(parts)
126
- except Exception as e:
127
- print(f"[{region}] 법규 로드 실패: {e}")
128
-
129
- unique_parts = sorted(set(all_reg_list_part), key=reg_embedding_system.natural_sort_key)
130
- return jsonify(reg_list_part="\n".join(unique_parts))
131
-
132
- # --- SocketIO ---
133
- @socketio.on('connect')
134
- def handle_connect():
135
- global connected_clients
136
- connected_clients += 1
137
- print(f"클라이언트 연결: {connected_clients}명")
138
-
139
- @socketio.on('disconnect')
140
- def handle_disconnect():
141
- global connected_clients
142
- connected_clients -= 1
143
- print(f"연결 해제: {connected_clients}명")
144
- if connected_clients <= 0:
145
- cleanup_connections()
146
- print("서버 종료")
147
- os._exit(0)
148
-
149
- def cleanup_connections():
150
- for region, rag in region_rag_objects.items():
151
- try:
152
- rag["sqlite_conn"].close()
153
- print(f"[{region}] DB 연결 종료")
154
- except:
155
- pass
156
-
157
- # --- Together AI 분석 ---
158
- def Gemma3_AI_analysis(query_txt, content_txt):
159
- content_txt = "\n".join(doc.page_content for doc in content_txt) if isinstance(content_txt, list) else str(content_txt)
160
- query_txt = str(query_txt)
161
- prompt = lexi_prompts.use_prompt(lexi_prompts.AI_system_prompt, query_txt=query_txt, content_txt=content_txt)
162
-
163
- response = client.chat.completions.create(
164
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
165
- messages=[{"role": "user", "content": prompt}],
166
- max_tokens=1024,
167
- temperature=0.7
168
- )
169
- return response.choices[0].message.content
170
-
171
- # --- Together AI 번역 ---
172
- def Gemma3_AI_Translate(query_txt):
173
- query_txt = str(query_txt)
174
- prompt = lexi_prompts.use_prompt(lexi_prompts.query_translator, query_txt=query_txt)
175
-
176
- response = client.chat.completions.create(
177
- model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
178
- messages=[{"role": "user", "content": prompt}],
179
- max_tokens=512,
180
- temperature=0.3
181
- )
182
- return response.choices[0].message.content
183
-
184
- # --- 검색 ---
185
- def search_DB_from_multiple_regions(query, selected_regions, region_rag_objects):
186
- selected_regions = selected_regions or list(region_rag_objects.keys())
187
- query = Gemma3_AI_Translate(query)
188
- print(f"번역된 쿼리: {query}")
189
-
190
- combined_results = []
191
- for region in selected_regions:
192
- rag = region_rag_objects.get(region)
193
- if not rag: continue
194
-
195
- retriever = rag["ensemble_retriever"]
196
- vectorstore = rag["vectorstore"]
197
- sqlite_conn = rag["sqlite_conn"]
198
-
199
- if Filtered_search:
200
- results = reg_embedding_system.search_with_metadata_filter(
201
- ensemble_retriever=retriever,
202
- vectorstore=vectorstore,
203
- query=query,
204
- k=search_document_number,
205
- metadata_filter=filters,
206
- sqlite_conn=sqlite_conn
207
- )
208
- else:
209
- results = reg_embedding_system.smart_search_vectorstore(
210
- retriever=retriever,
211
- query=query,
212
- k=search_document_number,
213
- vectorstore=vectorstore,
214
- sqlite_conn=sqlite_conn,
215
- enable_detailed_search=True
216
- )
217
- print(f"[{region}] 검색: {len(results)}건")
218
- combined_results.extend(results)
219
-
220
- return combined_results
221
-
222
- # --- 최종 AI ---
223
- def RegAI(query, Rag_Results, ResultFile_FolderAddress):
224
- gc.collect()
225
- AI_Result = "검색 결과가 없습니다." if not Rag_Results else Gemma3_AI_analysis(query, Rag_Results)
226
-
227
- with open(ResultFile_FolderAddress, 'w', encoding='utf-8') as f:
228
- print("검색된 문서:", file=f)
229
- for i, doc in enumerate(Rag_Results):
230
- print(f"문서 {i+1}: {doc.page_content[:200]}... (메타: {doc.metadata})", file=f)
231
- print("\n답변:", file=f)
232
- print(AI_Result, file=f)
233
-
234
- return AI_Result
235
-
236
- # --- 실행 ---
237
- if __name__ == '__main__':
238
- threading.Thread(target=load_rag_objects, daemon=True).start()
239
- time.sleep(2)
240
- socketio.emit('message', {'message': '데이터 로딩 시작...'})
241
-
242
- socketio.run(app, host='0.0.0.0', port=7860, debug=False)
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from flask import Flask, render_template, jsonify, request
3
+ from flask_socketio import SocketIO
4
+ import threading
5
+ import os
6
+ import sqlite3
7
+ import gc
8
+ import time
9
+ import re
10
+
11
+ # --- 외부 모듈 임포트 ---
12
+ import reg_embedding_system
13
+ import leximind_prompts
14
+
15
+ # --- Together AI SDK ---
16
+ from together import Together
17
+
18
+ # --- Flask & SocketIO 설정 ---
19
+ app = Flask(__name__)
20
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
21
+
22
+ # --- 전역 변수 ---
23
+ connected_clients = 0
24
+ search_document_number = 30
25
+ Filtered_search = False
26
+ filters = {"regulation_part": []}
27
+
28
+ # --- 경로 설정 ---
29
+ current_dir = os.path.dirname(os.path.abspath(__file__))
30
+ ResultFile_FolderAddress = os.path.join(current_dir, 'result.txt')
31
+
32
+ # --- RAG 데이터 경로 ---
33
+ region_paths = {
34
+ "국내": "/app/data/KMVSS_RAG",
35
+ "북미": "/app/data/FMVSS_RAG",
36
+ "유럽": "/app/data/EUR_RAG"
37
+ }
38
+
39
+ # --- 프롬프트 ---
40
+ lexi_prompts = leximind_prompts.PromptLibrary()
41
+
42
+ # --- RAG 객체 ---
43
+ region_rag_objects = {}
44
+
45
+ # --- Together AI 클라이언트 (Hugging Face Secrets에서 API 키 가져오기) ---
46
+ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
47
+ if not TOGETHER_API_KEY:
48
+ raise EnvironmentError("TOGETHER_API_KEY가 설정되지 않았습니다. Hugging Face Secrets에 추가하세요.")
49
+
50
+ client = Together(api_key=TOGETHER_API_KEY)
51
+
52
+ # --- RAG 로딩 ---
53
+ def load_rag_objects():
54
+ global region_rag_objects
55
+ for region, path in region_paths.items():
56
+ if not os.path.exists(path):
57
+ msg = f"[{region}] 경로 없음: {path}"
58
+ socketio.emit('message', {'message': msg})
59
+ print(msg)
60
+ continue
61
+
62
+ try:
63
+ socketio.emit('message', {'message': f"[{region}] RAG 로딩 중..."})
64
+ ensemble_retriever, vectorstore, sqlite_conn = reg_embedding_system.load_embedding_from_faiss(path)
65
+ sqlite_conn.close()
66
+ db_path = os.path.join(path, "metadata_mapping.db")
67
+ new_conn = sqlite3.connect(db_path, check_same_thread=False)
68
+
69
+ region_rag_objects[region] = {
70
+ "ensemble_retriever": ensemble_retriever,
71
+ "vectorstore": vectorstore,
72
+ "sqlite_conn": new_conn
73
+ }
74
+ socketio.emit('message', {'message': f"[{region}] 로딩 완료"})
75
+ print(f"[{region}] RAG 로딩 완료")
76
+
77
+ except Exception as e:
78
+ error_msg = f"[{region}] 로딩 실패: {str(e)}"
79
+ print(error_msg)
80
+ socketio.emit('message', {'message': error_msg})
81
+
82
+ socketio.emit('message', {'message': "Ready to Search"})
83
+ print("Ready to Search")
84
+
85
+ # --- 웹 ---
86
+ @app.route('/')
87
+ def index():
88
+ return render_template('chat.html')
89
+
90
+ # --- 메시지 ---
91
+ @app.route('/get_message', methods=['POST'])
92
+ def get_message():
93
+ global Filtered_search, filters
94
+ data = request.get_json()
95
+ query = data.get('query', '').strip()
96
+ regions = data.get('regions', [])
97
+ selected_regulations = data.get('selectedRegulations', [])
98
+
99
+ filters = {"regulation_part": []}
100
+ Filtered_search = bool(selected_regulations)
101
+ if selected_regulations:
102
+ for reg in selected_regulations:
103
+ title = reg.get('title', '')
104
+ if title:
105
+ filters["regulation_part"].append(title)
106
+
107
+ Rag_Results = search_DB_from_multiple_regions(query, regions, region_rag_objects)
108
+ AImessage = RegAI(query, Rag_Results, ResultFile_FolderAddress)
109
+
110
+ return jsonify(message=AImessage)
111
+
112
+ # --- 법규 리스트 ---
113
+ @app.route('/get_reg_list', methods=['POST'])
114
+ def get_reg_list():
115
+ data = request.get_json()
116
+ selected_regions = data.get('regions', []) or ["국내", "북미", "유럽"]
117
+
118
+ all_reg_list_part = []
119
+ for region in selected_regions:
120
+ rag = region_rag_objects.get(region)
121
+ if not rag: continue
122
+ try:
123
+ conn = rag["sqlite_conn"]
124
+ parts = reg_embedding_system.get_unique_metadata_values(conn, "regulation_part")
125
+ all_reg_list_part.extend(parts)
126
+ except Exception as e:
127
+ print(f"[{region}] 법규 로드 실패: {e}")
128
+
129
+ unique_parts = sorted(set(all_reg_list_part), key=reg_embedding_system.natural_sort_key)
130
+ return jsonify(reg_list_part="\n".join(unique_parts))
131
+
132
+ # --- SocketIO ---
133
+ @socketio.on('connect')
134
+ def handle_connect():
135
+ global connected_clients
136
+ connected_clients += 1
137
+ print(f"클라이언트 연결: {connected_clients}명")
138
+
139
+ @socketio.on('disconnect')
140
+ def handle_disconnect():
141
+ global connected_clients
142
+ connected_clients -= 1
143
+ print(f"연결 해제: {connected_clients}명")
144
+ if connected_clients <= 0:
145
+ cleanup_connections()
146
+ print("서버 종료")
147
+ os._exit(0)
148
+
149
+ def cleanup_connections():
150
+ for region, rag in region_rag_objects.items():
151
+ try:
152
+ rag["sqlite_conn"].close()
153
+ print(f"[{region}] DB 연결 종료")
154
+ except:
155
+ pass
156
+
157
+ # --- Together AI 분석 ---
158
+ def Gemma3_AI_analysis(query_txt, content_txt):
159
+ content_txt = "\n".join(doc.page_content for doc in content_txt) if isinstance(content_txt, list) else str(content_txt)
160
+ query_txt = str(query_txt)
161
+ prompt = lexi_prompts.use_prompt(lexi_prompts.AI_system_prompt, query_txt=query_txt, content_txt=content_txt)
162
+
163
+ response = client.chat.completions.create(
164
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
165
+ messages=[{"role": "user", "content": prompt}],
166
+ max_tokens=1024,
167
+ temperature=0.7
168
+ )
169
+ return response.choices[0].message.content
170
+
171
+ # --- Together AI 번역 ---
172
+ def Gemma3_AI_Translate(query_txt):
173
+ query_txt = str(query_txt)
174
+ prompt = lexi_prompts.use_prompt(lexi_prompts.query_translator, query_txt=query_txt)
175
+
176
+ response = client.chat.completions.create(
177
+ model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
178
+ messages=[{"role": "user", "content": prompt}],
179
+ max_tokens=512,
180
+ temperature=0.3
181
+ )
182
+ return response.choices[0].message.content
183
+
184
+ # --- 검색 ---
185
+ def search_DB_from_multiple_regions(query, selected_regions, region_rag_objects):
186
+ selected_regions = selected_regions or list(region_rag_objects.keys())
187
+ query = Gemma3_AI_Translate(query)
188
+ print(f"번역된 쿼리: {query}")
189
+
190
+ combined_results = []
191
+ for region in selected_regions:
192
+ rag = region_rag_objects.get(region)
193
+ if not rag: continue
194
+
195
+ retriever = rag["ensemble_retriever"]
196
+ vectorstore = rag["vectorstore"]
197
+ sqlite_conn = rag["sqlite_conn"]
198
+
199
+ if Filtered_search:
200
+ results = reg_embedding_system.search_with_metadata_filter(
201
+ ensemble_retriever=retriever,
202
+ vectorstore=vectorstore,
203
+ query=query,
204
+ k=search_document_number,
205
+ metadata_filter=filters,
206
+ sqlite_conn=sqlite_conn
207
+ )
208
+ else:
209
+ results = reg_embedding_system.smart_search_vectorstore(
210
+ retriever=retriever,
211
+ query=query,
212
+ k=search_document_number,
213
+ vectorstore=vectorstore,
214
+ sqlite_conn=sqlite_conn,
215
+ enable_detailed_search=True
216
+ )
217
+ print(f"[{region}] 검색: {len(results)}건")
218
+ combined_results.extend(results)
219
+
220
+ return combined_results
221
+
222
+ # --- 최종 AI ---
223
+ def RegAI(query, Rag_Results, ResultFile_FolderAddress):
224
+ gc.collect()
225
+ AI_Result = "검색 결과가 없습니다." if not Rag_Results else Gemma3_AI_analysis(query, Rag_Results)
226
+
227
+ with open(ResultFile_FolderAddress, 'w', encoding='utf-8') as f:
228
+ print("검색된 문서:", file=f)
229
+ for i, doc in enumerate(Rag_Results):
230
+ print(f"문서 {i+1}: {doc.page_content[:200]}... (메타: {doc.metadata})", file=f)
231
+ print("\n답변:", file=f)
232
+ print(AI_Result, file=f)
233
+
234
+ return AI_Result
235
+
236
+ # --- 실행 ---
237
+ if __name__ == '__main__':
238
+ # 로컬 개발용 (flask run 또는 python app.py)
239
+ threading.Thread(target=load_rag_objects, daemon=True).start()
240
+ time.sleep(2)
241
+ socketio.emit('message', {'message': '데이터 로딩 시작...'})
242
+ socketio.run(app, host='0.0.0.0', port=7860, debug=False)
243
+ else:
244
+ # Gunicorn으로 실행될 때는 백그라운드 로딩만 시작
245
+ # (Gunicorn 워커가 시작된 후 로딩 시작되도록)
246
+ import atexit
247
+ loading_thread = threading.Thread(target=load_rag_objects, daemon=True)
248
+ loading_thread.start()
249
+
250
+ # 서버 종료 시 정리
251
+ atexit.register(cleanup_connections)