Spaces:

DIVYA-NSHU99
/

nevergiveup

Sleeping

App Files Files Community

DIVYA-NSHU99 commited on Mar 6

Commit

90699f6

verified ·

1 Parent(s): 75c3655

Update conflict_check/gradio_app.py

Browse files

Files changed (1) hide show

conflict_check/gradio_app.py +221 -154

conflict_check/gradio_app.py CHANGED Viewed

@@ -1,76 +1,92 @@
 # conflict_check/gradio_app.py
 """
-Gradio web interface for the Trademark Conflict Checker.
-This replaces the CLI main.py for Hugging Face Spaces deployment.
 """
-import gradio as gr
-import json
-import os
 import sys
 from pathlib import Path
 from datetime import datetime
-# ── Make sure our packages are importable ─────────────────
-# When running from Docker: PYTHONPATH=/home/user/app/conflict_check
-# When running locally:     add conflict_check/ to sys.path
-THIS_DIR = Path(__file__).resolve().parent          # conflict_check/
 if str(THIS_DIR) not in sys.path:
     sys.path.insert(0, str(THIS_DIR))
-# ── Now import project modules ────────────────────────────
 from app.controllers.search_controller import handle_search
 from app.utils.extract_pairs import iterate_pairs_from_file
 from app.similarity.factor1 import score_factor1
-# ─────────────────────────────────────────────────────────
-# Core search + analysis function
-# ─────────────────────────────────────────────────────────
-def run_trademark_search(keyword: str, intl_class: str, filing_status: str):
-    """
-    Called by Gradio when user clicks Search.
-    Returns:
-        status_msg  — plain text log shown in the Status box
-        table_data  — list of dicts shown in the Results table
-    """
-    log_lines = []
-    def log(msg):
-        log_lines.append(msg)
-        print(msg)   # also visible in HF Space logs
-    # ── Input validation ─────────────────────────────────
-    if not keyword.strip():
-        return "❌ Please enter a keyword.", []
-    keyword      = keyword.strip()
-    intl_class   = intl_class.strip() or None
-    filing_status = filing_status.strip() or None
-    log(f"🔍 Searching: '{keyword}'  class={intl_class}  status={filing_status}")
-    # ── STEP 1: Fetch from Atom API ───────────────────────
     try:
         results = handle_search(keyword, intl_class, filing_status)
     except Exception as e:
-        return f"❌ API Error: {str(e)}", []
-    log(f"✅ Fetched {len(results)} trademark records from Atom API")
     if not results:
-        return (
-            "\n".join(log_lines) +
-            "\n\n⚠️  No records returned from API.\n"
-            "Possible reasons:\n"
-            "  • Daily quota exhausted — try again tomorrow\n"
-            "  • API key invalid — check HF Space Secrets\n"
-            "  • No trademarks match this keyword + filters",
-            []
-        )
-    # ── STEP 2: Find newest saved JSON file ───────────────
     search_folder = THIS_DIR / "search_data"
     json_files = sorted(
         search_folder.glob("search_*.json"),
@@ -79,152 +95,203 @@ def run_trademark_search(keyword: str, intl_class: str, filing_status: str):
     )
     if not json_files:
-        return "\n".join(log_lines) + "\n❌ No JSON file found after search.", []
     latest_file = json_files[0]
-    log(f"📂 Using file: {latest_file.name}")
-    # ── STEP 3: Run DuPont Factor-1 Similarity ────────────
-    log(f"\n⚙️  Running DuPont Factor-1 analysis...")
     analysis_results = []
     for name, serial, idx in iterate_pairs_from_file(latest_file):
         if not name:
             continue
-        score = score_factor1(keyword, name)
         analysis_results.append({
-            "Conflicting Mark" : name,
-            "Serial Number"    : serial or "N/A",
-            "Visual Score"     : round(score.visual_similarity,   3),
-            "Phonetic Score"   : round(score.phonetic_similarity, 3),
-            "Meaning Score"    : round(score.meaning_similarity,  3),
-            "Composite Score"  : round(score.composite_score,     3),
-            "Risk Level"       : _risk_label(score.composite_score),
         })
-    # ── STEP 4: Sort by composite score ───────────────────
-    analysis_results.sort(key=lambda x: x["Composite Score"], reverse=True)
-    # ── STEP 5: Save analysis to file ────────────────────
     output_folder = THIS_DIR / "analysis_output"
     output_folder.mkdir(exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    out_file = output_folder / f"factor1_{keyword.replace(' ','_')}_{timestamp}.json"
-    with open(out_file, "w", encoding="utf-8") as f:
         json.dump(analysis_results, f, indent=2, ensure_ascii=False)
-    log(f"✅ Analysis complete — {len(analysis_results)} marks scored")
-    log(f"💾 Saved to: {out_file.name}")
-    status_text = "\n".join(log_lines)
-    return status_text, analysis_results
-def _risk_label(score: float) -> str:
-    """Convert composite score to human-readable risk level."""
-    if score >= 0.85:
-        return "🔴 HIGH"
-    elif score >= 0.65:
-        return "🟠 MEDIUM"
-    elif score >= 0.45:
-        return "🟡 LOW"
-    else:
-        return "🟢 MINIMAL"
-# ─────────────────────────────────────────────────────────
-# Gradio UI Layout
-# ─────────────────────────────────────────────────────────
-with gr.Blocks(
-    title="Trademark Conflict Checker",
-    theme=gr.themes.Soft()
-) as demo:
     gr.Markdown("""
-    # ⚖️ Trademark Conflict Checker
     **TMEP §1207.01 — DuPont Factor 1 Analysis**
-    Search the USPTO trademark database via Atom API and score similarity
-    using Visual (Jaro-Winkler), Phonetic (Metaphone), and Semantic (SBERT) analysis.
     """)
-    # ── Input Row ────────────────────────────────────────
     with gr.Row():
-        keyword_input = gr.Textbox(
-            label="🔤 Applied Mark (your keyword)",
-            placeholder="e.g. APPLE, NIKE SHOES, TECHFLOW",
-            scale=3
-        )
-        class_input = gr.Textbox(
-            label="📦 International Class (optional)",
-            placeholder="e.g. 009, 025, 042",
-            scale=1
-        )
-        status_input = gr.Dropdown(
-            label="📋 Filing Status",
-            choices=["all", "active", "pending", "dead"],
-            value="all",
             scale=1
         )
-    search_btn = gr.Button("🔍 Run Conflict Check", variant="primary", size="lg")
-    # ── Status Output ────────────────────────────────────
-    status_output = gr.Textbox(
-        label="📊 Search Log",
-        lines=8,
-        interactive=False
-    )
-    # ── Results Table ────────────────────────────────────
-    gr.Markdown("### 🏆 Conflict Results (sorted by risk)")
-    results_table = gr.Dataframe(
-        headers=[
-            "Conflicting Mark",
-            "Serial Number",
-            "Visual Score",
-            "Phonetic Score",
-            "Meaning Score",
-            "Composite Score",
-            "Risk Level"
-        ],
-        datatype=["str", "str", "number", "number", "number", "number", "str"],
         interactive=False,
         wrap=True
     )
-    # ── Score Guide ──────────────────────────────────────
-    with gr.Accordion("📖 Score Guide", open=False):
         gr.Markdown("""
-        | Score Range | Risk Level | Meaning |
-        |---|---|---|
-        | 0.85 – 1.00 | 🔴 HIGH | Very likely conflict — consult attorney |
-        | 0.65 – 0.84 | 🟠 MEDIUM | Possible conflict — review carefully |
-        | 0.45 – 0.64 | 🟡 LOW | Minor similarity — probably safe |
-        | 0.00 – 0.44 | 🟢 MINIMAL | Very low similarity |
-        **Score Components:**
-        - **Visual Score** — Jaro-Winkler string similarity (how marks *look*)
-        - **Phonetic Score** — Metaphone encoding (how marks *sound*)
-        - **Meaning Score** — Sentence-BERT cosine similarity (what marks *mean*)
-        - **Composite Score** — Weighted average: 35% visual + 35% phonetic + 30% meaning
         """)
-    # ── Wire up button ────────────────────────────────────
-    search_btn.click(
-        fn=run_trademark_search,
-        inputs=[keyword_input, class_input, status_input],
-        outputs=[status_output, results_table]
     )
-# ─────────────────────────────────────────────────────────
-# Launch
-# ─────────────────────────────────────────────────────────
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",   # required for Docker / HF Spaces
-        server_port=7860,         # HF Spaces default port
-        show_error=True
     )

 # conflict_check/gradio_app.py
 """
+Serves TWO things on port 7860:
+  POST /search  <- REST API called by trademark_pdf_extractor.py
+  GET  /        <- Gradio UI for direct browser use
 """
 import sys
+import json
+import logging
 from pathlib import Path
 from datetime import datetime
+from typing import Optional
+import gradio as gr
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+# ── Make project importable ───────────────────────────────────────────────────
+THIS_DIR = Path(__file__).resolve().parent
 if str(THIS_DIR) not in sys.path:
     sys.path.insert(0, str(THIS_DIR))
 from app.controllers.search_controller import handle_search
 from app.utils.extract_pairs import iterate_pairs_from_file
 from app.similarity.factor1 import score_factor1
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("trademark_hf")
+# ─────────────────────────────────────────────────────────────────────────────
+# Shared core logic
+# ─────────────────────────────────────────────────────────────────────────────
+def _risk_label(score: float) -> str:
+    if score >= 0.75:
+        return "HIGH"
+    elif score >= 0.50:
+        return "MEDIUM"
+    else:
+        return "LOW"
+def _build_explanation(score_obj, applied: str, conflicting: str) -> str:
+    parts = []
+    if score_obj.visual_similarity >= 0.80:
+        parts.append("visually very similar")
+    elif score_obj.visual_similarity >= 0.60:
+        parts.append("visually similar")
+    if score_obj.phonetic_similarity >= 0.80:
+        parts.append("sounds alike")
+    elif score_obj.phonetic_similarity >= 0.60:
+        parts.append("phonetically similar")
+    if score_obj.meaning_similarity >= 0.75:
+        parts.append("same conceptual meaning")
+    elif score_obj.meaning_similarity >= 0.55:
+        parts.append("related meaning")
+    if score_obj.dominant_word_match:
+        parts.append("dominant word identical")
+    if not parts:
+        return f"Low overall similarity between '{applied}' and '{conflicting}'."
+    return f"'{applied}' and '{conflicting}' are {', '.join(parts)}."
+def run_conflict_analysis(
+    keyword: str,
+    intl_class: Optional[str] = None,
+    filing_status: str = "active"
+) -> list:
+    if not keyword or not keyword.strip():
+        return []
+    keyword = keyword.strip()
+    logger.info("Conflict search: keyword=%s class=%s status=%s",
+                keyword, intl_class, filing_status)
     try:
         results = handle_search(keyword, intl_class, filing_status)
     except Exception as e:
+        logger.exception("handle_search failed")
+        return [{"error": str(e), "status": "failed"}]
     if not results:
+        logger.warning("Atom API returned 0 results for '%s'", keyword)
+        return []
     search_folder = THIS_DIR / "search_data"
     json_files = sorted(
         search_folder.glob("search_*.json"),
     )
     if not json_files:
+        logger.error("No search JSON file found after API call")
+        return []
     latest_file = json_files[0]
+    logger.info("Using search file: %s", latest_file.name)
     analysis_results = []
     for name, serial, idx in iterate_pairs_from_file(latest_file):
         if not name:
             continue
+        try:
+            score_obj = score_factor1(keyword, name)
+        except Exception as e:
+            logger.warning("score_factor1 failed for '%s': %s", name, e)
+            continue
         analysis_results.append({
+            "applied_mark":     keyword,
+            "conflicting_mark": name,
+            "serial":           serial or "N/A",
+            "score":            round(score_obj.composite_score, 4),
+            "risk":             _risk_label(score_obj.composite_score),
+            "explanation":      _build_explanation(score_obj, keyword, name),
+            "visual_score":     round(score_obj.visual_similarity,   3),
+            "phonetic_score":   round(score_obj.phonetic_similarity, 3),
+            "meaning_score":    round(score_obj.meaning_similarity,  3),
         })
+    analysis_results.sort(key=lambda x: x["score"], reverse=True)
     output_folder = THIS_DIR / "analysis_output"
     output_folder.mkdir(exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_path = output_folder / f"factor1_{keyword.replace(' ','_')}_{timestamp}.json"
+    with open(out_path, "w", encoding="utf-8") as f:
         json.dump(analysis_results, f, indent=2, ensure_ascii=False)
+    logger.info("Saved %s (%d marks scored)", out_path.name, len(analysis_results))
+    return analysis_results
+# ─────────────────────────────────────────────────────────────────────────────
+# Step 1 — Build FastAPI app and register ALL routes FIRST
+# ─────────────────────────────────────────────────────────────────────────────
+# IMPORTANT: All FastAPI routes must be registered on `app` BEFORE calling
+# gr.mount_gradio_app(), because that call returns a new app object.
+# Any route registered after mounting would be on a different object
+# and would result in 404 errors.
+# ─────────────────────────────────────────────────────────────────────────────
+app = FastAPI(title="Trademark Conflict API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class SearchRequest(BaseModel):
+    mark_text:     str
+    intl_class:    Optional[str] = None
+    filing_status: str = "active"
+# ── Register /search BEFORE mounting Gradio ───────────────────────────────────
+@app.post("/search")
+async def search_endpoint(body: SearchRequest):
+    """
+    Called by trademark_pdf_extractor.py → forward_mark_text_to_server().
+    Input:  { "mark_text": "APPLE", "filing_status": "active" }
+    Output: list of conflict records sorted by composite score
+    """
+    logger.info("POST /search  mark_text='%s'", body.mark_text)
+    results = run_conflict_analysis(
+        keyword=body.mark_text,
+        intl_class=body.intl_class,
+        filing_status=body.filing_status or "active"
+    )
+    return JSONResponse(content=results)
+# ── Register /health BEFORE mounting Gradio ───────────────────────────────────
+@app.get("/health")
+def health():
+    return {"status": "ok", "service": "trademark-conflict-hf"}
+# ─────────────────────────────────────────────────────────────────────────────
+# Step 2 — Build Gradio UI
+# NOTE: theme is NOT passed to gr.Blocks() in Gradio 6.x — it goes to launch()
+# ─────────────────────────────────────────────────────────────────────────────
+def gradio_search(keyword: str, intl_class: str, filing_status: str):
+    if not keyword.strip():
+        return "Please enter a keyword.", []
+    results = run_conflict_analysis(
+        keyword.strip(),
+        intl_class.strip() or None,
+        filing_status
+    )
+    if not results:
+        return "No results returned. Check API quota or credentials.", []
+    if results[0].get("error"):
+        return f"Error: {results[0]['error']}", []
+    high   = sum(1 for r in results if r["risk"] == "HIGH")
+    medium = sum(1 for r in results if r["risk"] == "MEDIUM")
+    low    = sum(1 for r in results if r["risk"] == "LOW")
+    status = (
+        f"Analysis complete — {len(results)} marks scored\n"
+        f"HIGH: {high}   MEDIUM: {medium}   LOW: {low}"
+    )
+    table = [
+        [r["conflicting_mark"], r["serial"],
+         r["visual_score"], r["phonetic_score"],
+         r["meaning_score"], r["score"],
+         r["risk"], r["explanation"]]
+        for r in results[:50]
+    ]
+    return status, table
+# theme is removed from gr.Blocks() — Gradio 6.x requires it in launch()
+with gr.Blocks(title="Trademark Conflict Checker") as gradio_ui:
     gr.Markdown("""
+    # Trademark Conflict Checker
     **TMEP §1207.01 — DuPont Factor 1 Analysis**
+    > This Space also serves `POST /search` as a REST API for automated use.
     """)
     with gr.Row():
+        kw_in = gr.Textbox(label="Applied Mark", placeholder="e.g. APPLE", scale=3)
+        cl_in = gr.Textbox(label="Int'l Class (optional)", placeholder="e.g. 009", scale=1)
+        st_in = gr.Dropdown(
+            label="Filing Status",
+            choices=["active", "pending", "dead", "all"],
+            value="active",
             scale=1
         )
+    btn = gr.Button("Run Conflict Check", variant="primary")
+    status_out = gr.Textbox(label="Status", lines=4, interactive=False)
+    gr.Markdown("### Results (sorted by highest risk)")
+    table_out = gr.Dataframe(
+        headers=["Conflicting Mark", "Serial", "Visual", "Phonetic",
+                 "Meaning", "Composite", "Risk", "Explanation"],
+        datatype=["str", "str", "number", "number", "number", "number", "str", "str"],
         interactive=False,
         wrap=True
     )
+    with gr.Accordion("Score Guide", open=False):
         gr.Markdown("""
+        | Score     | Risk   | Meaning                            |
+        |-----------|--------|------------------------------------|
+        | ≥ 0.75    | HIGH   | Likely conflict — consult attorney |
+        | 0.50–0.74 | MEDIUM | Possible conflict — review         |
+        | < 0.50    | LOW    | Low similarity                     |
         """)
+    btn.click(
+        fn=gradio_search,
+        inputs=[kw_in, cl_in, st_in],
+        outputs=[status_out, table_out]
     )
+# ─────────────────────────────────────────────────────────────────────────────
+# Step 3 — Mount Gradio INTO the existing FastAPI app
+#
+# gr.mount_gradio_app returns a new combined app object.
+# We reassign `app` to that new object.
+# All routes registered above (/search, /health) are preserved
+# because they were added to `app` BEFORE this call.
+# Gradio UI is available at /ui
+# ─────────────────────────────────────────────────────────────────────────────
+app = gr.mount_gradio_app(app, gradio_ui, path="/ui")
+# ─────────────────────────────────────────────────────────────────────────────
+# Entry point
+# ─────────────────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "gradio_app:app",
+        host="0.0.0.0",
+        port=7860,
+        log_level="info"
     )