Koyuki-0129 commited on
Commit
832d0a1
·
1 Parent(s): 1eb619c

import LLM

Browse files
pyproject.toml CHANGED
@@ -10,7 +10,11 @@ dependencies = [
10
  "sentence-transformers>=2.2.0",
11
  "sudachipy>=0.6.0",
12
  "sudachidict-core>=20240716",
 
13
  "python-dotenv>=1.0.0",
 
 
 
14
  ]
15
 
16
  [build-system]
 
10
  "sentence-transformers>=2.2.0",
11
  "sudachipy>=0.6.0",
12
  "sudachidict-core>=20240716",
13
+ "openpyxl>=3.1.0",
14
  "python-dotenv>=1.0.0",
15
+ "pydantic-ai>=1.3.0",
16
+ "openai>=2.6.0",
17
+ "anthropic>=0.71.0",
18
  ]
19
 
20
  [build-system]
src/daily_ra/app.py CHANGED
@@ -2,32 +2,26 @@ import streamlit as st
2
  import psycopg2
3
  import json
4
  from datetime import datetime
5
- from sudachipy import dictionary, tokenizer
6
- from sentence_transformers import SentenceTransformer, util
7
  import pandas as pd
8
  import os
9
  from dotenv import load_dotenv
10
 
 
 
11
  # ============================
12
  # 🔧 1. 設定
13
  # ============================
14
 
15
- # Load environment variables from .env file (for local development)
16
  load_dotenv()
17
 
18
- # DB接続設定
19
  DB_HOST = os.environ.get("DB_HOST")
20
  DB_PORT = os.environ.get("DB_PORT", "5432")
21
  DB_USER = os.environ.get("DB_USERNAME")
22
  DB_PASSWORD = os.environ.get("DB_PASSWORD")
23
  DB_NAME = os.environ.get("DB_NAME", "postgres")
24
 
25
- print(DB_HOST, DB_PORT, DB_USER, DB_NAME)
26
-
27
- # Construct PostgreSQL connection URL for Supabase
28
  if all([DB_HOST, DB_USER, DB_PASSWORD, DB_NAME]):
29
  DB_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
30
- print(f"🔗 Connecting to Supabase database as user '{DB_USER}'...")
31
  else:
32
  st.error("❌ Database connection details not found in environment variables!")
33
  st.stop()
@@ -39,145 +33,42 @@ try:
39
  conn = psycopg2.connect(DB_URL)
40
  conn.autocommit = True
41
  cur = conn.cursor()
42
- print(f"✅ Connected to Supabase database '{DB_NAME}'")
43
  except Exception as e:
44
  st.error(f"❌ Database connection failed: {str(e)}")
45
  st.stop()
46
 
47
- # SudachiPy セットアップ
48
- sudachi_tokenizer = dictionary.Dictionary().create()
49
- def sudachi_tokenizer_func(text):
50
- tokens = sudachi_tokenizer.tokenize(text, tokenizer.Tokenizer.SplitMode.C)
51
- return [t.surface() for t in tokens]
52
-
53
- # SentenceTransformerモデル
54
- model = SentenceTransformer("all-MiniLM-L12-v2")
55
-
56
- # 正規化辞書
57
- NORMALIZE = {
58
- "重機": ["ショベルカー", "ユンボ", "バックホウ", "グレーダー"],
59
- "作業員": ["作業者", "職人", "人"],
60
- "クレーン": ["クレーン車", "吊り上げ機"],
61
- "足場": ["仮設足場", "高所足場"],
62
- "吊荷": ["荷", "吊り荷", "吊下げ物"]
63
- }
64
-
65
- # 分類キーワード
66
- OBJECTS = ["作業員", "重機", "クレーン", "吊荷", "足場", "ダンプ"]
67
- RISKS = ["挟まれ", "接触", "墜落", "転倒", "感電", "落下", "衝突"]
68
-
69
- POTENTIAL_RISKS = {
70
- ("作業員", "重機"): "作業員と重機が近接している状態",
71
- ("作業員", "足場"): "作業員が高所作業中の可能性",
72
- ("クレーン", "吊荷"): "吊荷の下に人がいる可能性",
73
- ("作業員", "吊荷"): "作業員が吊荷の下にいる可能性",
74
- }
75
-
76
- # ============================
77
- # 🧩 2. 関数群
78
- # ============================
79
-
80
- def normalize_text(text):
81
- """表記ゆれ統一"""
82
- for base, words in NORMALIZE.items():
83
- for w in words:
84
- text = text.replace(w, base)
85
- return text
86
-
87
- def extract_relations(text):
88
- """
89
- 文中の対象物とリスクを組み合わせて簡易ペア抽出
90
- """
91
- pairs = []
92
- text_norm = normalize_text(text)
93
-
94
- # 文中の対象物を検出
95
- found_objects = [obj for obj in OBJECTS if obj in text_norm]
96
-
97
- # 文中のリスクワードを検出
98
- found_risks = [risk for risk in RISKS if risk in text_norm]
99
-
100
- # 複数対象物とリスクがある場合にペア化
101
- if len(found_objects) >= 2 and found_risks:
102
- for i in range(len(found_objects)):
103
- for j in range(i+1, len(found_objects)):
104
- pairs.append((found_objects[i], found_objects[j], found_risks))
105
- return pairs
106
-
107
- def generate_rules(data):
108
- """ルールベース生成"""
109
- text = normalize_text(" ".join([
110
- data["work_content"],
111
- data["hazard_points"],
112
- data["risk_identification"],
113
- data["mitigation_measures"]
114
- ]))
115
-
116
- # 構文関係抽出
117
- relations = extract_relations(text)
118
-
119
- rules = []
120
- for subj, obj, _ in relations:
121
- # 潜在リスクを確認
122
- risk_desc = POTENTIAL_RISKS.get((subj, obj)) or POTENTIAL_RISKS.get((obj, subj)) or []
123
- rules.append({
124
- "object1": subj,
125
- "object2": obj,
126
- "risk": risk_desc
127
- })
128
- return rules
129
-
130
  # ============================
131
- # 🖥 3. Streamlit UI
132
- # ============================
133
- import pandas as pd
134
-
135
- # ============================
136
- # Excelファイル読み込み(初回のみ)
137
  # ============================
138
  @st.cache_data
139
  def load_category_data():
140
  df = pd.read_excel("All process.xlsx") # ファイルパスは適宜修正
141
  df.columns = [col.strip() for col in df.columns]
142
- # 「章」→大分類、「工種」→小分類 として統一
143
  df = df.rename(columns={"章": "大分類", "工種": "小分類"})
144
  return df
145
 
146
  df_categories = load_category_data()
147
 
148
-
 
 
149
  st.title("日次RA入力")
150
  st.subheader("作業内容の選択")
151
 
152
- # --- 大分類(章) ---
153
  major_categories = sorted(df_categories["大分類"].dropna().unique())
154
- selected_major = st.selectbox(
155
- "章(大分類)を選択してください",
156
- ["--選択してください--"] + list(major_categories),
157
- key="major"
158
- )
159
 
160
- # --- 小分類(工種) ---
161
- if selected_major and selected_major != "--選択してください--":
162
  filtered_df = df_categories[df_categories["大分類"] == selected_major]
163
  sub_categories = sorted(filtered_df["小分類"].dropna().unique())
164
- selected_sub = st.selectbox(
165
- "工種(小分類)を選択してください",
166
- ["--選択してください--"] + list(sub_categories),
167
- key="sub"
168
- )
169
  else:
170
  selected_sub = "--選択してください--"
171
 
172
- # --- 作業内容を確定 ---
173
- if (
174
- selected_major != "--選択してください--"
175
- and selected_sub != "--選択してください--"
176
- ):
177
- work_content = f"{selected_major} - {selected_sub}"
178
  st.success(f"作業内容: {work_content}")
179
  else:
180
- work_content = ""
181
  st.warning("章と工種を選択してください。")
182
 
183
  # ============================
@@ -190,15 +81,14 @@ with st.form("ra_form"):
190
  risk_identification = st.text_area("危険性・有害性の特定")
191
  mitigation_measures = st.text_area("危険性・有害性の低減策")
192
  inspection_items = st.text_area("点検事項")
193
-
194
  submitted = st.form_submit_button("保存")
195
 
196
  # ============================
197
  # フォーム送信処理
198
  # ============================
199
  if submitted:
200
- if work_content == "":
201
- st.error("❌ 作業内容が未選択です。章と工種を選んでください。")
202
  else:
203
  form_data = {
204
  "work_date": str(work_date),
@@ -209,57 +99,39 @@ if submitted:
209
  "mitigation_measures": mitigation_measures,
210
  "inspection_items": inspection_items
211
  }
212
- st.success("✅ 入力内容を保存しました!")
213
-
214
 
215
- # --- ルール生成 ---
216
- rules = generate_rules(form_data)
217
-
218
- # --- PostgreSQL 保存 ---
219
- sql = """INSERT INTO daily_ra
220
- (work_date, work_content, hazard_points, general_comments, risk_identification, mitigation_measures, inspection_items, created_at)
221
- VALUES (%s,%s,%s,%s,%s,%s,%s,NOW()) RETURNING id"""
222
- cur.execute(sql, tuple(form_data.values()))
223
- daily_id = cur.fetchone()[0] # PostgreSQL uses RETURNING to get the inserted ID
224
-
225
- for r in rules:
226
- sql_rule = """INSERT INTO rule_base (daily_ra_id, object1, object2, risk, created_at)
227
- VALUES (%s,%s,%s,%s,NOW())"""
228
- cur.execute(sql_rule, (daily_id, r["object1"], r["object2"], json.dumps(r["risk"], ensure_ascii=False)))
229
-
230
- # No need to commit with autocommit=True, but keeping for clarity
231
- conn.commit()
232
- st.success("✅ 入力内容とルールベースの生成・保存が完了しました!")
233
-
234
- # --- 表形式でルール表示 ---
235
- if rules:
236
- df = pd.DataFrame(rules)
237
- st.subheader("🔍 生成されたルール(テーブル形式)")
238
- st.dataframe(df)
239
-
240
- # --- JSON作成(LLM連携用)&保存 ---
241
- json_data = {
242
- "daily_id": daily_id,
243
- "rules": rules
244
- }
245
 
246
- # JSON保存用ディレクトリ作
247
- # Use absolute path in Docker, relative path locally
248
- if os.path.exists("/app"):
249
- json_dir = "/app/json_data"
250
- else:
251
- json_dir = "json_data"
252
-
253
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  os.makedirs(json_dir, exist_ok=True)
255
-
256
- # ファイル名に daily_id とタイムスタンプを付与
257
  json_path = os.path.join(json_dir, f"daily_ra_{daily_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
258
-
259
- # JSONファイルとして保存
260
  with open(json_path, "w", encoding="utf-8") as f:
261
  json.dump(json_data, f, ensure_ascii=False, indent=2)
262
-
263
- st.success(f"✅ JSONファイルを保存しました: {json_path}")
264
- except PermissionError:
265
- st.warning("⚠️ JSONファイルの保存はスキップされました(データベースには正常に保存されています)")
 
2
  import psycopg2
3
  import json
4
  from datetime import datetime
 
 
5
  import pandas as pd
6
  import os
7
  from dotenv import load_dotenv
8
 
9
+ from daily_ra.services.llm_service import llm_service, DailyRAInput
10
+
11
  # ============================
12
  # 🔧 1. 設定
13
  # ============================
14
 
 
15
  load_dotenv()
16
 
 
17
  DB_HOST = os.environ.get("DB_HOST")
18
  DB_PORT = os.environ.get("DB_PORT", "5432")
19
  DB_USER = os.environ.get("DB_USERNAME")
20
  DB_PASSWORD = os.environ.get("DB_PASSWORD")
21
  DB_NAME = os.environ.get("DB_NAME", "postgres")
22
 
 
 
 
23
  if all([DB_HOST, DB_USER, DB_PASSWORD, DB_NAME]):
24
  DB_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 
25
  else:
26
  st.error("❌ Database connection details not found in environment variables!")
27
  st.stop()
 
33
  conn = psycopg2.connect(DB_URL)
34
  conn.autocommit = True
35
  cur = conn.cursor()
 
36
  except Exception as e:
37
  st.error(f"❌ Database connection failed: {str(e)}")
38
  st.stop()
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # ============================
41
+ # Excelファイル読み込み
 
 
 
 
 
42
  # ============================
43
  @st.cache_data
44
  def load_category_data():
45
  df = pd.read_excel("All process.xlsx") # ファイルパスは適宜修正
46
  df.columns = [col.strip() for col in df.columns]
 
47
  df = df.rename(columns={"章": "大分類", "工種": "小分類"})
48
  return df
49
 
50
  df_categories = load_category_data()
51
 
52
+ # ============================
53
+ # Streamlit UI
54
+ # ============================
55
  st.title("日次RA入力")
56
  st.subheader("作業内容の選択")
57
 
 
58
  major_categories = sorted(df_categories["大分類"].dropna().unique())
59
+ selected_major = st.selectbox("章(大分類)を選択してください", ["--選択してください--"] + list(major_categories))
 
 
 
 
60
 
61
+ if selected_major != "--選択してください--":
 
62
  filtered_df = df_categories[df_categories["大分類"] == selected_major]
63
  sub_categories = sorted(filtered_df["小分類"].dropna().unique())
64
+ selected_sub = st.selectbox("工種(小分類)を選択してください", ["--選択してください--"] + list(sub_categories))
 
 
 
 
65
  else:
66
  selected_sub = "--選択してください--"
67
 
68
+ work_content = f"{selected_major} - {selected_sub}" if selected_major != "--選択してください--" and selected_sub != "--選択してください--" else ""
69
+ if work_content:
 
 
 
 
70
  st.success(f"作業内容: {work_content}")
71
  else:
 
72
  st.warning("章と工種を選択してください。")
73
 
74
  # ============================
 
81
  risk_identification = st.text_area("危険性・有害性の特定")
82
  mitigation_measures = st.text_area("危険性・有害性の低減策")
83
  inspection_items = st.text_area("点検事項")
 
84
  submitted = st.form_submit_button("保存")
85
 
86
  # ============================
87
  # フォーム送信処理
88
  # ============================
89
  if submitted:
90
+ if not work_content:
91
+ st.error("❌ 作業内容が未選択です。")
92
  else:
93
  form_data = {
94
  "work_date": str(work_date),
 
99
  "mitigation_measures": mitigation_measures,
100
  "inspection_items": inspection_items
101
  }
 
 
102
 
103
+ # --- DB保存 ---
104
+ sql = """INSERT INTO daily_ra
105
+ (work_date, work_content, hazard_points, general_comments, risk_identification, mitigation_measures, inspection_items, created_at)
106
+ VALUES (%s,%s,%s,%s,%s,%s,%s,NOW()) RETURNING id"""
107
+ cur.execute(sql, tuple(form_data.values()))
108
+ daily_id = cur.fetchone()[0]
109
+ conn.commit()
110
+ st.success("✅ 入力内容を保存しました!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ # --- 🔥 LLMでルール生 ---
113
+ with st.spinner("🤖 LLMで安全ルールを生成中..."):
114
+ input_data = DailyRAInput(**form_data)
115
+ rules = llm_service.generate_rules(input_data)
116
+
117
+ # --- ルール保存 ---
118
+ if rules:
119
+ for r in rules:
120
+ sql_rule = """INSERT INTO rule_base (daily_ra_id, object1, object2, risk, created_at)
121
+ VALUES (%s,%s,%s,%s,NOW())"""
122
+ cur.execute(sql_rule, (daily_id, r.object1, r.object2, r.risk))
123
+ conn.commit()
124
+ st.success("✅ LLM生成ルールを保存しました!")
125
+ st.subheader("🔍 LLMが生成した安全ルール")
126
+ st.dataframe([r.dict() for r in rules])
127
+ else:
128
+ st.warning("⚠️ LLMによるルール生成に失敗しました。")
129
+
130
+ # --- JSON作成&保存 ---
131
+ json_data = {"daily_id": daily_id, "rules": [r.dict() for r in rules]}
132
+ json_dir = "/app/json_data" if os.path.exists("/app") else "json_data"
133
  os.makedirs(json_dir, exist_ok=True)
 
 
134
  json_path = os.path.join(json_dir, f"daily_ra_{daily_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
 
 
135
  with open(json_path, "w", encoding="utf-8") as f:
136
  json.dump(json_data, f, ensure_ascii=False, indent=2)
137
+ st.success(f"✅ JSONファイルを保存しました: {json_path}")
 
 
 
src/daily_ra/models/__init__.py ADDED
File without changes
src/daily_ra/models/schemas.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ class DailyRAInput(BaseModel):
5
+ work_date: str
6
+ work_content: str
7
+ hazard_points: str
8
+ general_comments: str
9
+ risk_identification: str
10
+ mitigation_measures: str
11
+ inspection_items: str
12
+
13
+ class GeneratedRule(BaseModel):
14
+ object1: str
15
+ object2: str
16
+ risk: str
src/daily_ra/services/llm_service.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+ from openai import OpenAI
4
+ from dotenv import load_dotenv
5
+ import os, json
6
+
7
+ # .env を読み込む
8
+ load_dotenv()
9
+
10
+ # クライアントを初期化
11
+ client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
12
+
13
+ # ======== Pydantic スキーマ ========
14
+ class DailyRAInput(BaseModel):
15
+ work_date: str
16
+ work_content: str
17
+ hazard_points: str
18
+ general_comments: str
19
+ risk_identification: str
20
+ mitigation_measures: str
21
+ inspection_items: str
22
+
23
+ class GeneratedRule(BaseModel):
24
+ object1: str
25
+ object2: str
26
+ risk: str
27
+
28
+ # ======== LLM サービス ========
29
+ class LLMService:
30
+ @staticmethod
31
+ def generate_prompt(data: DailyRAInput) -> str:
32
+ return f"""
33
+ 以下は日次RAの作業内容です。作業者、重機、クレーンなどの対象物と危険性を抽出し、
34
+ 安全ルールを JSON 形式で返してください。出力形式は
35
+ [{{"object1": "対象1", "object2": "対象2", "risk": "リスク"}}] です。
36
+
37
+ 作業日: {data.work_date}
38
+ 作業内容: {data.work_content}
39
+ 作業危険ポイント: {data.hazard_points}
40
+ 元請コメント: {data.general_comments}
41
+ 危険性・有害性の特定: {data.risk_identification}
42
+ 危険性・有害性の低減策: {data.mitigation_measures}
43
+ 点検事項: {data.inspection_items}
44
+ """
45
+
46
+ @staticmethod
47
+ def generate_rules(data: DailyRAInput) -> List[GeneratedRule]:
48
+ prompt = LLMService.generate_prompt(data)
49
+
50
+ try:
51
+ response = client.chat.completions.create(
52
+ model="gpt-4o-mini",
53
+ messages=[
54
+ {"role": "system", "content": "あなたは建設現場の安全ルール生成AIです。"},
55
+ {"role": "user", "content": prompt},
56
+ ],
57
+ temperature=0.2,
58
+ )
59
+ text = response.choices[0].message.content.strip()
60
+
61
+ # ✅ コードブロック(````json ... `````)を除去
62
+ if text.startswith("```"):
63
+ text = text.strip("`")
64
+ text = text.replace("json", "").strip()
65
+
66
+ # JSONとしてパース
67
+ rules_raw = json.loads(text)
68
+ return [GeneratedRule(**r) for r in rules_raw]
69
+
70
+ except json.JSONDecodeError as e:
71
+ print(f"⚠️ JSONデコードエラー: {e}\n出力内容: {text}")
72
+ return []
73
+ except Exception as e:
74
+ print(f"⚠️ LLM生成エラー: {e}")
75
+ return []
76
+
77
+ # シングルトンインスタンス
78
+ llm_service = LLMService()