Swaroop05 commited on
Commit
ca0edd2
·
verified ·
1 Parent(s): e0e7a96

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +243 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, cv2, re, base64
2
+ import numpy as np
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from roboflow import Roboflow
6
+ from openai import OpenAI
7
+ from openpyxl import load_workbook
8
+
9
+ # ================= CONFIG =================
10
+ ROBOFLOW_API_KEY = "uP19IAi98TqwLvHmNB8V"
11
+ ROBOFLOW_PROJECT = "braker3"
12
+ ROBOFLOW_VERSION = 6
13
+ CONF_THRESHOLD = 0.35
14
+ IOU_THRESHOLD = 0.4
15
+
16
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
17
+ rf = Roboflow(api_key=ROBOFLOW_API_KEY)
18
+ model = rf.workspace().project(ROBOFLOW_PROJECT).version(ROBOFLOW_VERSION).model
19
+
20
+ # ================= CONSTANTS =================
21
+ SPEC_JP = {
22
+ "Manufacture Name": "メーカー",
23
+ "Circuit Name": "回路番号",
24
+ "Load Name": "負荷名称",
25
+ "Breaking Capacity": "遮断容量",
26
+ "AF": "フレーム(AF)",
27
+ "AT": "トリップ(AT)"
28
+ }
29
+
30
+ MANUFACTURER_MAP = {
31
+ "MITSUBISHI ELECTRIC": "三菱電機",
32
+ "SIEMENS": "SIEMENS",
33
+ "SCHNEIDER ELECTRIC": "SCHNEIDER ELECTRIC",
34
+ "ABB": "ABB",
35
+ "LS ELECTRIC": "LS ELECTRIC"
36
+ }
37
+
38
+ VALID_BREAKING_CAPACITY = {"6","10","15","25","36","50","65","85"}
39
+ DEFAULT_BREAKING_CAPACITY = "85" # ← your dataset truth
40
+
41
+ # ================= IMAGE =================
42
+ def prepare_for_roboflow(img, max_side=1024):
43
+ h,w = img.shape[:2]
44
+ scale = min(max_side/max(h,w),1.0)
45
+ return cv2.resize(img,(int(w*scale),int(h*scale))) if scale<1 else img
46
+
47
+ def crop(img,x1,y1,x2,y2,pad=20):
48
+ h,w = img.shape[:2]
49
+ return img[max(0,y1-pad):min(h,y2+pad), max(0,x1-pad):min(w,x2+pad)]
50
+
51
+ def enhance(img):
52
+ g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
53
+ g = cv2.resize(g,None,fx=3,fy=3,interpolation=cv2.INTER_CUBIC)
54
+ clahe = cv2.createCLAHE(2.0,(8,8))
55
+ return cv2.cvtColor(clahe.apply(g), cv2.COLOR_GRAY2BGR)
56
+
57
+ def enhance_breaking_capacity(img):
58
+ g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
59
+ g = cv2.resize(g,None,fx=4,fy=4,interpolation=cv2.INTER_CUBIC)
60
+ g = cv2.adaptiveThreshold(
61
+ g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
62
+ cv2.THRESH_BINARY,31,2
63
+ )
64
+ return cv2.cvtColor(g, cv2.COLOR_GRAY2BGR)
65
+
66
+ def img_to_base64(img):
67
+ _,buf = cv2.imencode(".jpg",img)
68
+ return base64.b64encode(buf).decode()
69
+
70
+ # ================= TEXT =================
71
+ def remove_spaces_only(text):
72
+ return re.sub(r"\s+", "", str(text)) if text else ""
73
+
74
+ def extract_digits(text):
75
+ nums = re.findall(r"\d+",str(text))
76
+ return nums[-1] if nums else ""
77
+
78
+ def clean_manufacturer(text):
79
+ t=text.upper()
80
+ for k in MANUFACTURER_MAP:
81
+ if k in t:
82
+ return k
83
+ return ""
84
+
85
+ def normalize_breaking_capacity(text):
86
+ nums = re.findall(r"\d+",str(text))
87
+ for n in nums:
88
+ if n in VALID_BREAKING_CAPACITY:
89
+ return n
90
+ return DEFAULT_BREAKING_CAPACITY
91
+
92
+ # ================= GPT OCR =================
93
+ def gpt_ocr(label,img):
94
+ if label == "Breaking Capacity":
95
+ img = enhance_breaking_capacity(img)
96
+ else:
97
+ img = enhance(img)
98
+
99
+ b64 = img_to_base64(img)
100
+
101
+ rules={
102
+ "Manufacture Name":"Return ONLY manufacturer name in English.",
103
+ "Circuit Name":"Return EXACT text.",
104
+ "Load Name":"Return EXACT text.",
105
+ "AF":"Return ONLY number.",
106
+ "AT":"Return ONLY number.",
107
+ "Breaking Capacity":"Return ONLY kA number."
108
+ }
109
+
110
+ r = client.chat.completions.create(
111
+ model="gpt-5.2",
112
+ messages=[
113
+ {"role":"system","content":"Strict OCR engine"},
114
+ {"role":"user","content":[
115
+ {"type":"text","text":rules[label]},
116
+ {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}
117
+ ]}
118
+ ],
119
+ temperature=0
120
+ )
121
+
122
+ raw = r.choices[0].message.content.strip()
123
+
124
+ if label == "Manufacture Name":
125
+ return clean_manufacturer(raw)
126
+ if label in ["Circuit Name","Load Name"]:
127
+ return remove_spaces_only(raw)
128
+ if label in ["AF","AT"]:
129
+ return extract_digits(raw)
130
+ if label == "Breaking Capacity":
131
+ return normalize_breaking_capacity(raw)
132
+
133
+ return raw
134
+
135
+ # ================= EXCEL =================
136
+ def normalize_header(s):
137
+ return str(s).replace("\n","").replace(" ","")
138
+
139
+ def find_column(df,keys):
140
+ for c in df.columns:
141
+ for k in keys:
142
+ if k in normalize_header(c):
143
+ return c
144
+ return None
145
+
146
+ def verify_excel(excel,det):
147
+ wb=load_workbook(excel,data_only=True)
148
+ ws=wb["MCB"]
149
+
150
+ raw=pd.DataFrame([list(r) for r in ws.iter_rows(values_only=True)])
151
+ raw.dropna(how="all",inplace=True)
152
+
153
+ hdr=None
154
+ for i in range(len(raw)):
155
+ if "回路" in "".join(map(str,raw.iloc[i].values)):
156
+ hdr=i; break
157
+
158
+ if hdr is None:
159
+ return pd.DataFrame([["回路番号","", "NO","ヘッダー不明"]],
160
+ columns=["仕様","検出値","Excelに存在?","備考"])
161
+
162
+ df=raw.iloc[hdr+1:].copy()
163
+ df.columns=raw.iloc[hdr]
164
+ df.dropna(how="all",inplace=True)
165
+
166
+ ccol=find_column(df,["回路番号","回路"])
167
+ target=None
168
+ for _,r in df.iterrows():
169
+ if remove_spaces_only(r[ccol])==det.get("Circuit Name",""):
170
+ target=r; break
171
+
172
+ if target is None:
173
+ return pd.DataFrame([["回路番号",det.get("Circuit Name",""),"NO","Excelに存在しない"]],
174
+ columns=["仕様","検出値","Excelに存在?","備考"])
175
+
176
+ rows=[]
177
+ for k,jp in SPEC_JP.items():
178
+ detv=det.get(k,"")
179
+ col=find_column(df,[jp.replace("(","").replace(")",""),jp[:2]])
180
+ excelv=str(target[col]) if col else ""
181
+
182
+ if k in ["Circuit Name","Load Name"]:
183
+ ok=remove_spaces_only(detv)==remove_spaces_only(excelv)
184
+ elif k=="Manufacture Name":
185
+ ok=MANUFACTURER_MAP.get(detv,detv)==excelv
186
+ else:
187
+ ok=detv==excelv
188
+
189
+ rows.append([jp,detv,"YES" if ok else "NO","" if ok else f"Excel値: {excelv}"])
190
+
191
+ return pd.DataFrame(rows,columns=["仕様","検出値","Excelに存在?","備考"])
192
+
193
+ # ================= PIPELINE =================
194
+ def run_pipeline(image,excel):
195
+ if image is None:
196
+ return None,pd.DataFrame(),pd.DataFrame(),None
197
+
198
+ img=prepare_for_roboflow(image)
199
+ preds=model.predict(
200
+ img,
201
+ confidence=int(CONF_THRESHOLD*100),
202
+ overlap=int(IOU_THRESHOLD*100)
203
+ ).json()["predictions"]
204
+
205
+ best={}
206
+ vis=img.copy()
207
+
208
+ for p in preds:
209
+ lab=p["class"]
210
+ x,y,w,h=map(int,[p["x"],p["y"],p["width"],p["height"]])
211
+ x1,y1,x2,y2=x-w//2,y-h//2,x+w//2,y+h//2
212
+ cv2.rectangle(vis,(x1,y1),(x2,y2),(0,255,0),2)
213
+ c=crop(img,x1,y1,x2,y2)
214
+ if lab not in best or p["confidence"]>best[lab][0]:
215
+ best[lab]=(p["confidence"],c)
216
+
217
+ det={}
218
+ rows=[]
219
+ for lab,(_,c) in best.items():
220
+ v=gpt_ocr(lab,c)
221
+ if v:
222
+ det[lab]=v
223
+ rows.append([lab,v])
224
+
225
+ return vis, pd.DataFrame(rows,columns=["Field","Extracted Text"]), verify_excel(excel,det), "verification_result.xlsx"
226
+
227
+ # ================= UI =================
228
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
229
+ gr.Markdown("# ⚡ Breaker Panel OCR & Verification")
230
+
231
+ with gr.Row():
232
+ img_in=gr.Image(type="numpy",label="Upload Image")
233
+ img_out=gr.Image(label="Detected Image")
234
+
235
+ excel_in=gr.File(label="Upload Excel (MCB)",file_types=[".xlsx",".xlsm"])
236
+ btn=gr.Button("Run Verification",variant="primary")
237
+
238
+ t1=gr.Dataframe(label="OCR Output")
239
+ t2=gr.Dataframe(label="Verification Result")
240
+ f=gr.File(label="Download Result")
241
+
242
+ btn.click(run_pipeline,[img_in,excel_in],[img_out,t1,t2,f])
243
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ roboflow
4
+ opencv-python
5
+ numpy
6
+ matplotlib
7
+ pandas
8
+ openpyxl
9
+ Pillow