DIVYA-NSHU99 commited on
Commit
90699f6
·
verified ·
1 Parent(s): 75c3655

Update conflict_check/gradio_app.py

Browse files
Files changed (1) hide show
  1. conflict_check/gradio_app.py +221 -154
conflict_check/gradio_app.py CHANGED
@@ -1,76 +1,92 @@
1
  # conflict_check/gradio_app.py
2
  """
3
- Gradio web interface for the Trademark Conflict Checker.
4
- This replaces the CLI main.py for Hugging Face Spaces deployment.
 
5
  """
6
 
7
- import gradio as gr
8
- import json
9
- import os
10
  import sys
 
 
11
  from pathlib import Path
12
  from datetime import datetime
 
13
 
14
- # ── Make sure our packages are importable ─────────────────
15
- # When running from Docker: PYTHONPATH=/home/user/app/conflict_check
16
- # When running locally: add conflict_check/ to sys.path
17
- THIS_DIR = Path(__file__).resolve().parent # conflict_check/
 
 
 
 
18
  if str(THIS_DIR) not in sys.path:
19
  sys.path.insert(0, str(THIS_DIR))
20
 
21
- # ── Now import project modules ────────────────────────────
22
  from app.controllers.search_controller import handle_search
23
  from app.utils.extract_pairs import iterate_pairs_from_file
24
  from app.similarity.factor1 import score_factor1
25
 
 
 
26
 
27
- # ─────────────────────────────────────────────────────────
28
- # Core search + analysis function
29
- # ─────────────────────────────────────────────────────────
30
- def run_trademark_search(keyword: str, intl_class: str, filing_status: str):
31
- """
32
- Called by Gradio when user clicks Search.
33
- Returns:
34
- status_msg — plain text log shown in the Status box
35
- table_data — list of dicts shown in the Results table
36
- """
37
-
38
- log_lines = []
39
-
40
- def log(msg):
41
- log_lines.append(msg)
42
- print(msg) # also visible in HF Space logs
43
-
44
- # ── Input validation ─────────────────────────────────
45
- if not keyword.strip():
46
- return "❌ Please enter a keyword.", []
47
 
48
- keyword = keyword.strip()
49
- intl_class = intl_class.strip() or None
50
- filing_status = filing_status.strip() or None
51
 
52
- log(f"🔍 Searching: '{keyword}' class={intl_class} status={filing_status}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # ── STEP 1: Fetch from Atom API ───────────────────────
55
  try:
56
  results = handle_search(keyword, intl_class, filing_status)
57
  except Exception as e:
58
- return f"❌ API Error: {str(e)}", []
59
-
60
- log(f"✅ Fetched {len(results)} trademark records from Atom API")
61
 
62
  if not results:
63
- return (
64
- "\n".join(log_lines) +
65
- "\n\n⚠️ No records returned from API.\n"
66
- "Possible reasons:\n"
67
- " • Daily quota exhausted — try again tomorrow\n"
68
- " • API key invalid — check HF Space Secrets\n"
69
- " • No trademarks match this keyword + filters",
70
- []
71
- )
72
 
73
- # ── STEP 2: Find newest saved JSON file ───────────────
74
  search_folder = THIS_DIR / "search_data"
75
  json_files = sorted(
76
  search_folder.glob("search_*.json"),
@@ -79,152 +95,203 @@ def run_trademark_search(keyword: str, intl_class: str, filing_status: str):
79
  )
80
 
81
  if not json_files:
82
- return "\n".join(log_lines) + "\n❌ No JSON file found after search.", []
 
83
 
84
  latest_file = json_files[0]
85
- log(f"📂 Using file: {latest_file.name}")
86
-
87
- # ── STEP 3: Run DuPont Factor-1 Similarity ────────────
88
- log(f"\n⚙️ Running DuPont Factor-1 analysis...")
89
 
90
  analysis_results = []
91
-
92
  for name, serial, idx in iterate_pairs_from_file(latest_file):
93
  if not name:
94
  continue
95
- score = score_factor1(keyword, name)
 
 
 
 
 
96
  analysis_results.append({
97
- "Conflicting Mark" : name,
98
- "Serial Number" : serial or "N/A",
99
- "Visual Score" : round(score.visual_similarity, 3),
100
- "Phonetic Score" : round(score.phonetic_similarity, 3),
101
- "Meaning Score" : round(score.meaning_similarity, 3),
102
- "Composite Score" : round(score.composite_score, 3),
103
- "Risk Level" : _risk_label(score.composite_score),
 
 
104
  })
105
 
106
- # ── STEP 4: Sort by composite score ───────────────────
107
- analysis_results.sort(key=lambda x: x["Composite Score"], reverse=True)
108
 
109
- # ── STEP 5: Save analysis to file ────────────────────
110
  output_folder = THIS_DIR / "analysis_output"
111
  output_folder.mkdir(exist_ok=True)
112
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
113
- out_file = output_folder / f"factor1_{keyword.replace(' ','_')}_{timestamp}.json"
114
- with open(out_file, "w", encoding="utf-8") as f:
115
  json.dump(analysis_results, f, indent=2, ensure_ascii=False)
116
 
117
- log(f" Analysis complete — {len(analysis_results)} marks scored")
118
- log(f"💾 Saved to: {out_file.name}")
119
 
120
- status_text = "\n".join(log_lines)
121
- return status_text, analysis_results
122
 
 
 
 
 
 
 
 
 
123
 
124
- def _risk_label(score: float) -> str:
125
- """Convert composite score to human-readable risk level."""
126
- if score >= 0.85:
127
- return "🔴 HIGH"
128
- elif score >= 0.65:
129
- return "🟠 MEDIUM"
130
- elif score >= 0.45:
131
- return "🟡 LOW"
132
- else:
133
- return "🟢 MINIMAL"
134
 
 
 
 
 
 
 
135
 
136
- # ─────────────────────────────────────────────────────────
137
- # Gradio UI Layout
138
- # ─────────────────────────────────────────────────────────
139
- with gr.Blocks(
140
- title="Trademark Conflict Checker",
141
- theme=gr.themes.Soft()
142
- ) as demo:
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  gr.Markdown("""
145
- # ⚖️ Trademark Conflict Checker
146
  **TMEP §1207.01 — DuPont Factor 1 Analysis**
147
-
148
- Search the USPTO trademark database via Atom API and score similarity
149
- using Visual (Jaro-Winkler), Phonetic (Metaphone), and Semantic (SBERT) analysis.
150
  """)
151
 
152
- # ── Input Row ────────────────────────────────────────
153
  with gr.Row():
154
- keyword_input = gr.Textbox(
155
- label="🔤 Applied Mark (your keyword)",
156
- placeholder="e.g. APPLE, NIKE SHOES, TECHFLOW",
157
- scale=3
158
- )
159
- class_input = gr.Textbox(
160
- label="📦 International Class (optional)",
161
- placeholder="e.g. 009, 025, 042",
162
- scale=1
163
- )
164
- status_input = gr.Dropdown(
165
- label="📋 Filing Status",
166
- choices=["all", "active", "pending", "dead"],
167
- value="all",
168
  scale=1
169
  )
170
 
171
- search_btn = gr.Button("🔍 Run Conflict Check", variant="primary", size="lg")
 
172
 
173
- # ── Status Output ────────────────────────────────────
174
- status_output = gr.Textbox(
175
- label="📊 Search Log",
176
- lines=8,
177
- interactive=False
178
- )
179
-
180
- # ── Results Table ────────────────────────────────────
181
- gr.Markdown("### 🏆 Conflict Results (sorted by risk)")
182
- results_table = gr.Dataframe(
183
- headers=[
184
- "Conflicting Mark",
185
- "Serial Number",
186
- "Visual Score",
187
- "Phonetic Score",
188
- "Meaning Score",
189
- "Composite Score",
190
- "Risk Level"
191
- ],
192
- datatype=["str", "str", "number", "number", "number", "number", "str"],
193
  interactive=False,
194
  wrap=True
195
  )
196
 
197
- # ── Score Guide ──────────────────────────────────────
198
- with gr.Accordion("📖 Score Guide", open=False):
199
  gr.Markdown("""
200
- | Score Range | Risk Level | Meaning |
201
- |---|---|---|
202
- | 0.85 – 1.00 | 🔴 HIGH | Very likely conflict — consult attorney |
203
- | 0.65 0.84 | 🟠 MEDIUM | Possible conflict — review carefully |
204
- | 0.45 0.64 | 🟡 LOW | Minor similarity — probably safe |
205
- | 0.00 – 0.44 | 🟢 MINIMAL | Very low similarity |
206
-
207
- **Score Components:**
208
- - **Visual Score** — Jaro-Winkler string similarity (how marks *look*)
209
- - **Phonetic Score** — Metaphone encoding (how marks *sound*)
210
- - **Meaning Score** — Sentence-BERT cosine similarity (what marks *mean*)
211
- - **Composite Score** — Weighted average: 35% visual + 35% phonetic + 30% meaning
212
  """)
213
 
214
- # ── Wire up button ────────────────────────────────────
215
- search_btn.click(
216
- fn=run_trademark_search,
217
- inputs=[keyword_input, class_input, status_input],
218
- outputs=[status_output, results_table]
219
  )
220
 
221
 
222
- # ─────────────────────────────────────────────────────────
223
- # Launch
224
- # ─────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  if __name__ == "__main__":
226
- demo.launch(
227
- server_name="0.0.0.0", # required for Docker / HF Spaces
228
- server_port=7860, # HF Spaces default port
229
- show_error=True
 
 
230
  )
 
1
  # conflict_check/gradio_app.py
2
  """
3
+ Serves TWO things on port 7860:
4
+ POST /search <- REST API called by trademark_pdf_extractor.py
5
+ GET / <- Gradio UI for direct browser use
6
  """
7
 
 
 
 
8
  import sys
9
+ import json
10
+ import logging
11
  from pathlib import Path
12
  from datetime import datetime
13
+ from typing import Optional
14
 
15
+ import gradio as gr
16
+ from fastapi import FastAPI
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ from fastapi.responses import JSONResponse
19
+ from pydantic import BaseModel
20
+
21
+ # ── Make project importable ───────────────────────────────────────────────────
22
+ THIS_DIR = Path(__file__).resolve().parent
23
  if str(THIS_DIR) not in sys.path:
24
  sys.path.insert(0, str(THIS_DIR))
25
 
 
26
  from app.controllers.search_controller import handle_search
27
  from app.utils.extract_pairs import iterate_pairs_from_file
28
  from app.similarity.factor1 import score_factor1
29
 
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger("trademark_hf")
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # ─────────────────────────────────────────────────────────────────────────────
35
+ # Shared core logic
36
+ # ─────────────────────────────────────────────────────────────────────────────
37
 
38
+ def _risk_label(score: float) -> str:
39
+ if score >= 0.75:
40
+ return "HIGH"
41
+ elif score >= 0.50:
42
+ return "MEDIUM"
43
+ else:
44
+ return "LOW"
45
+
46
+
47
+ def _build_explanation(score_obj, applied: str, conflicting: str) -> str:
48
+ parts = []
49
+ if score_obj.visual_similarity >= 0.80:
50
+ parts.append("visually very similar")
51
+ elif score_obj.visual_similarity >= 0.60:
52
+ parts.append("visually similar")
53
+ if score_obj.phonetic_similarity >= 0.80:
54
+ parts.append("sounds alike")
55
+ elif score_obj.phonetic_similarity >= 0.60:
56
+ parts.append("phonetically similar")
57
+ if score_obj.meaning_similarity >= 0.75:
58
+ parts.append("same conceptual meaning")
59
+ elif score_obj.meaning_similarity >= 0.55:
60
+ parts.append("related meaning")
61
+ if score_obj.dominant_word_match:
62
+ parts.append("dominant word identical")
63
+ if not parts:
64
+ return f"Low overall similarity between '{applied}' and '{conflicting}'."
65
+ return f"'{applied}' and '{conflicting}' are {', '.join(parts)}."
66
+
67
+
68
+ def run_conflict_analysis(
69
+ keyword: str,
70
+ intl_class: Optional[str] = None,
71
+ filing_status: str = "active"
72
+ ) -> list:
73
+ if not keyword or not keyword.strip():
74
+ return []
75
+
76
+ keyword = keyword.strip()
77
+ logger.info("Conflict search: keyword=%s class=%s status=%s",
78
+ keyword, intl_class, filing_status)
79
 
 
80
  try:
81
  results = handle_search(keyword, intl_class, filing_status)
82
  except Exception as e:
83
+ logger.exception("handle_search failed")
84
+ return [{"error": str(e), "status": "failed"}]
 
85
 
86
  if not results:
87
+ logger.warning("Atom API returned 0 results for '%s'", keyword)
88
+ return []
 
 
 
 
 
 
 
89
 
 
90
  search_folder = THIS_DIR / "search_data"
91
  json_files = sorted(
92
  search_folder.glob("search_*.json"),
 
95
  )
96
 
97
  if not json_files:
98
+ logger.error("No search JSON file found after API call")
99
+ return []
100
 
101
  latest_file = json_files[0]
102
+ logger.info("Using search file: %s", latest_file.name)
 
 
 
103
 
104
  analysis_results = []
 
105
  for name, serial, idx in iterate_pairs_from_file(latest_file):
106
  if not name:
107
  continue
108
+ try:
109
+ score_obj = score_factor1(keyword, name)
110
+ except Exception as e:
111
+ logger.warning("score_factor1 failed for '%s': %s", name, e)
112
+ continue
113
+
114
  analysis_results.append({
115
+ "applied_mark": keyword,
116
+ "conflicting_mark": name,
117
+ "serial": serial or "N/A",
118
+ "score": round(score_obj.composite_score, 4),
119
+ "risk": _risk_label(score_obj.composite_score),
120
+ "explanation": _build_explanation(score_obj, keyword, name),
121
+ "visual_score": round(score_obj.visual_similarity, 3),
122
+ "phonetic_score": round(score_obj.phonetic_similarity, 3),
123
+ "meaning_score": round(score_obj.meaning_similarity, 3),
124
  })
125
 
126
+ analysis_results.sort(key=lambda x: x["score"], reverse=True)
 
127
 
 
128
  output_folder = THIS_DIR / "analysis_output"
129
  output_folder.mkdir(exist_ok=True)
130
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
131
+ out_path = output_folder / f"factor1_{keyword.replace(' ','_')}_{timestamp}.json"
132
+ with open(out_path, "w", encoding="utf-8") as f:
133
  json.dump(analysis_results, f, indent=2, ensure_ascii=False)
134
 
135
+ logger.info("Saved %s (%d marks scored)", out_path.name, len(analysis_results))
136
+ return analysis_results
137
 
 
 
138
 
139
+ # ─────────────────────────────────────────────────────────────────────────────
140
+ # Step 1 — Build FastAPI app and register ALL routes FIRST
141
+ # ─────────────────────────────────────────────────────────────────────────────
142
+ # IMPORTANT: All FastAPI routes must be registered on `app` BEFORE calling
143
+ # gr.mount_gradio_app(), because that call returns a new app object.
144
+ # Any route registered after mounting would be on a different object
145
+ # and would result in 404 errors.
146
+ # ─────────────────────────────────────────────────────────────────────────────
147
 
148
+ app = FastAPI(title="Trademark Conflict API")
 
 
 
 
 
 
 
 
 
149
 
150
+ app.add_middleware(
151
+ CORSMiddleware,
152
+ allow_origins=["*"],
153
+ allow_methods=["*"],
154
+ allow_headers=["*"],
155
+ )
156
 
 
 
 
 
 
 
 
157
 
158
+ class SearchRequest(BaseModel):
159
+ mark_text: str
160
+ intl_class: Optional[str] = None
161
+ filing_status: str = "active"
162
+
163
+
164
+ # ── Register /search BEFORE mounting Gradio ───────────────────────────────────
165
+ @app.post("/search")
166
+ async def search_endpoint(body: SearchRequest):
167
+ """
168
+ Called by trademark_pdf_extractor.py → forward_mark_text_to_server().
169
+ Input: { "mark_text": "APPLE", "filing_status": "active" }
170
+ Output: list of conflict records sorted by composite score
171
+ """
172
+ logger.info("POST /search mark_text='%s'", body.mark_text)
173
+ results = run_conflict_analysis(
174
+ keyword=body.mark_text,
175
+ intl_class=body.intl_class,
176
+ filing_status=body.filing_status or "active"
177
+ )
178
+ return JSONResponse(content=results)
179
+
180
+
181
+ # ── Register /health BEFORE mounting Gradio ───────────────────────────────────
182
+ @app.get("/health")
183
+ def health():
184
+ return {"status": "ok", "service": "trademark-conflict-hf"}
185
+
186
+
187
+ # ─────────────────────────────────────────────────────────────────────────────
188
+ # Step 2 — Build Gradio UI
189
+ # NOTE: theme is NOT passed to gr.Blocks() in Gradio 6.x — it goes to launch()
190
+ # ─────────────────────────────────────────────────────────────────────────────
191
+
192
+ def gradio_search(keyword: str, intl_class: str, filing_status: str):
193
+ if not keyword.strip():
194
+ return "Please enter a keyword.", []
195
+
196
+ results = run_conflict_analysis(
197
+ keyword.strip(),
198
+ intl_class.strip() or None,
199
+ filing_status
200
+ )
201
+
202
+ if not results:
203
+ return "No results returned. Check API quota or credentials.", []
204
+
205
+ if results[0].get("error"):
206
+ return f"Error: {results[0]['error']}", []
207
+
208
+ high = sum(1 for r in results if r["risk"] == "HIGH")
209
+ medium = sum(1 for r in results if r["risk"] == "MEDIUM")
210
+ low = sum(1 for r in results if r["risk"] == "LOW")
211
+
212
+ status = (
213
+ f"Analysis complete — {len(results)} marks scored\n"
214
+ f"HIGH: {high} MEDIUM: {medium} LOW: {low}"
215
+ )
216
+
217
+ table = [
218
+ [r["conflicting_mark"], r["serial"],
219
+ r["visual_score"], r["phonetic_score"],
220
+ r["meaning_score"], r["score"],
221
+ r["risk"], r["explanation"]]
222
+ for r in results[:50]
223
+ ]
224
+ return status, table
225
+
226
+
227
+ # theme is removed from gr.Blocks() — Gradio 6.x requires it in launch()
228
+ with gr.Blocks(title="Trademark Conflict Checker") as gradio_ui:
229
  gr.Markdown("""
230
+ # Trademark Conflict Checker
231
  **TMEP §1207.01 — DuPont Factor 1 Analysis**
232
+ > This Space also serves `POST /search` as a REST API for automated use.
 
 
233
  """)
234
 
 
235
  with gr.Row():
236
+ kw_in = gr.Textbox(label="Applied Mark", placeholder="e.g. APPLE", scale=3)
237
+ cl_in = gr.Textbox(label="Int'l Class (optional)", placeholder="e.g. 009", scale=1)
238
+ st_in = gr.Dropdown(
239
+ label="Filing Status",
240
+ choices=["active", "pending", "dead", "all"],
241
+ value="active",
 
 
 
 
 
 
 
 
242
  scale=1
243
  )
244
 
245
+ btn = gr.Button("Run Conflict Check", variant="primary")
246
+ status_out = gr.Textbox(label="Status", lines=4, interactive=False)
247
 
248
+ gr.Markdown("### Results (sorted by highest risk)")
249
+ table_out = gr.Dataframe(
250
+ headers=["Conflicting Mark", "Serial", "Visual", "Phonetic",
251
+ "Meaning", "Composite", "Risk", "Explanation"],
252
+ datatype=["str", "str", "number", "number", "number", "number", "str", "str"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  interactive=False,
254
  wrap=True
255
  )
256
 
257
+ with gr.Accordion("Score Guide", open=False):
 
258
  gr.Markdown("""
259
+ | Score | Risk | Meaning |
260
+ |-----------|--------|------------------------------------|
261
+ | 0.75 | HIGH | Likely conflict — consult attorney |
262
+ | 0.50–0.74 | MEDIUM | Possible conflict — review |
263
+ | < 0.50 | LOW | Low similarity |
 
 
 
 
 
 
 
264
  """)
265
 
266
+ btn.click(
267
+ fn=gradio_search,
268
+ inputs=[kw_in, cl_in, st_in],
269
+ outputs=[status_out, table_out]
 
270
  )
271
 
272
 
273
+ # ─────────────────────────────────────────────────────────────────────────────
274
+ # Step 3 — Mount Gradio INTO the existing FastAPI app
275
+ #
276
+ # gr.mount_gradio_app returns a new combined app object.
277
+ # We reassign `app` to that new object.
278
+ # All routes registered above (/search, /health) are preserved
279
+ # because they were added to `app` BEFORE this call.
280
+ # Gradio UI is available at /ui
281
+ # ─────────────────────────────────────────────────────────────────────────────
282
+
283
+ app = gr.mount_gradio_app(app, gradio_ui, path="/ui")
284
+
285
+
286
+ # ─────────────────────────────────────────────────────────────────────────────
287
+ # Entry point
288
+ # ─────────────────────────────────────────────────────────────────────────────
289
+
290
  if __name__ == "__main__":
291
+ import uvicorn
292
+ uvicorn.run(
293
+ "gradio_app:app",
294
+ host="0.0.0.0",
295
+ port=7860,
296
+ log_level="info"
297
  )