Spaces:

emresar
/

gurma-dashboard

Running

App Files Files Community

Emre Sarigöl commited on Feb 18

Commit

d3a246e

1 Parent(s): a02dc64

Deploy GURMA.ai Dashboard - 2026-02-18 14:15

Browse files

Files changed (12) hide show

Dockerfile +1 -1
app.py +267 -256
cli.py +340 -0
config.py +99 -0
extract.py +537 -0
intel.py +508 -0
llm.py +154 -0
research.py +61 -1913
search.py +305 -0
sota_agent.py +850 -0
tr_agents.py +480 -0
tr_tab.py +218 -0

Dockerfile CHANGED Viewed

@@ -23,7 +23,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 # Create data directories
-RUN mkdir -p data/intel
 # Expose Streamlit port (HF Spaces expects app_port from README.md)
 EXPOSE 8501

 COPY . .
 # Create data directories
+RUN mkdir -p data/intel data/tr-mali data/tr-fonlar docs
 # Expose Streamlit port (HF Spaces expects app_port from README.md)
 EXPOSE 8501

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ import pandas as pd
 IS_HF_SPACE = os.getenv("HF_SPACE") or Path("/app/research.py").exists()
 if IS_HF_SPACE:
-    # HF Space: import from same directory
     from research import (
         SearchService,
         CompetitorExtractor,
@@ -41,9 +41,9 @@ if IS_HF_SPACE:
         LLM_ENABLED,
     )
 else:
-    # Local: add src to path and import from utils
     sys.path.insert(0, str(Path(__file__).parent.parent.parent))
-    from src.utils.research import (
         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
@@ -832,225 +832,18 @@ def export_html(data: dict, research: list, date_range: str = "All time") -> str
 # ============================================================
-# Main Application
 # ============================================================
-def main():
-    # Check access
-    if not check_access():
-        show_login_page()
-        return
-    # On HF Space, optionally hydrate runtime data from a private dataset repo.
-    sync_status = sync_private_data_if_configured()
-    if sync_status.get("status") == "error":
-        st.error(f"Private data sync failed: {sync_status.get('reason', 'unknown error')}")
-        return
-    # --- Page Navigation ---
-    page = st.sidebar.radio(
-        "Navigation",
-        ["Competitive Intel", "Model Evaluation"],
-        index=0,
-        key="nav_page",
-    )
-    if page == "Model Evaluation":
-        if IS_HF_SPACE:
-            from eval_tab import render_eval_tab
-        else:
-            from src.dashboard.eval_tab import render_eval_tab
-        render_eval_tab()
-        return
-    data = load_data()
-    research = load_research_files()
-    # --- Sidebar ---
-    with st.sidebar:
-        # === RESEARCH ===
-        st.header("Research")
-        queries_text = st.text_area(
-            "Queries",
-            value=DEFAULT_QUERIES,
-            height=150,
-            help="Enter search queries, one per line."
-        )
-        queries = [q.strip() for q in queries_text.strip().split("\n") if q.strip()]
-        # AI analysis option (only if LLM enabled)
-        analyze_with_ai = False
-        if LLM_ENABLED:
-            analyze_with_ai = st.checkbox("Analyze with AI", value=True, help="Use LLM to extract strategic insights from results")
-        if st.button(f"Run {len(queries)} searches", width="stretch", type="primary"):
-            progress = st.progress(0, text="Starting...")
-            success, total, failed, insights = run_expand_research(
-                queries,
-                progress_callback=lambda p, t: progress.progress(p, text=t),
-                analyze_with_ai=analyze_with_ai
-            )
-            progress.empty()
-            if success > 0:
-                msg = f"{success}/{total} searches done"
-                if insights:
-                    msg += f" + {len(insights)} AI insights"
-                    # Store insights in session state for display
-                    st.session_state.last_insights = insights
-                st.success(msg)
-            st.cache_data.clear()
-            st.rerun()
-        # Show last AI insights if any
-        if st.session_state.get("last_insights"):
-            with st.expander("AI Insights", expanded=True):
-                for insight in st.session_state.last_insights:
-                    st.caption(f"• {insight}")
-                if st.button("Clear", key="clear_insights"):
-                    del st.session_state.last_insights
-                    st.rerun()
-        st.divider()
-        # === DEEP INTEL ===
-        st.header("Deep Intel")
-        intel_company = st.selectbox("Competitor", COMPETITORS, index=0)
-        intel_categories = st.multiselect(
-            "Categories",
-            options=list(DEEP_INTEL_CATEGORIES.keys()),
-            default=list(DEEP_INTEL_CATEGORIES.keys()),
-            format_func=lambda k: DEEP_INTEL_CATEGORIES[k]["label"],
-        )
-        btn_col1, btn_col2 = st.columns([3, 1])
-        run_clicked = btn_col1.button("Run Deep Intel", width="stretch")
-        stop_clicked = btn_col2.button("Stop", key="stop_intel", width="stretch")
-        if stop_clicked:
-            st.session_state["intel_stop"] = True
-        if run_clicked:
-            st.session_state["intel_stop"] = False
-            agent = CompetitorIntelAgent(intel_company)
-            total_queries = sum(
-                len(DEEP_INTEL_CATEGORIES[c]["queries"])
-                for c in intel_categories if c in DEEP_INTEL_CATEGORIES
-            )
-            progress = st.progress(0, text=f"Starting {intel_company}...")
-            completed = [0]
-            original_search = agent.search.search
-            def _tracked_search(query, max_results=10, save=True):
-                if st.session_state.get("intel_stop"):
-                    return []
-                completed[0] += 1
-                progress.progress(
-                    min(completed[0] / max(total_queries, 1), 0.95),
-                    text=f"[{completed[0]}/{total_queries}] {query[:40]}...",
-                )
-                return original_search(query, max_results=max_results, save=save)
-            agent.search.search = _tracked_search
-            report_path = agent.run(
-                categories=intel_categories or None,
-                delay=1.0,
-            )
-            progress.progress(1.0, text="Done!")
-            progress.empty()
-            stopped = st.session_state.get("intel_stop", False)
-            findings = sum(len(s.findings) for s in agent.sections.values())
-            gaps = sum(len(s.gaps) for s in agent.sections.values())
-            if stopped:
-                st.warning(f"Stopped early — {intel_company}: {findings} findings, {gaps} gaps (partial)")
-            else:
-                st.success(f"{intel_company}: {findings} findings, {gaps} gaps")
-            st.session_state["intel_stop"] = False
-            st.cache_data.clear()
-            st.rerun()
-        st.divider()
-        # === DATA ===
-        st.header("Data")
-        date_range = st.selectbox(
-            "Time range",
-            ["All time", "Last 7 days", "Last 30 days", "Last 90 days"],
-            index=0,
-            label_visibility="collapsed"
-        )
-        col1, col2 = st.columns(2)
-        if col1.button("Refresh", width="stretch", help="Re-extract from research files"):
-            with st.spinner("..."):
-                run_extract()
-                st.cache_data.clear()
-                st.rerun()
-        if data:
-            report = export_html(data, research, date_range)
-            col2.download_button(
-                "Export",
-                report,
-                file_name=f"report-{datetime.now().strftime('%Y%m%d')}.html",
-                mime="text/html",
-                width="stretch"
-            )
-        st.divider()
-        # === STATUS ===
-        st.caption(f"{len(research)} files · Updated {data.get('_generated', 'N/A')[:10] if data else 'never'}")
-        if ACCESS_KEY and st.session_state.get("authenticated"):
-            if st.button("Logout", width="stretch"):
-                st.session_state.authenticated = False
-                st.session_state.admin_authenticated = False
-                st.query_params.pop("auth", None)
-                st.query_params.pop("adm", None)
-                st.rerun()
-        # === ADMIN: Access Log ===
-        if ADMIN_KEY:
-            # Auto-authenticate from URL token
-            if not st.session_state.get("admin_authenticated"):
-                if st.query_params.get("adm") == _auth_token(ADMIN_KEY, salt="gurma_adm"):
-                    st.session_state.admin_authenticated = True
-            st.divider()
-            if st.session_state.get("admin_authenticated"):
-                access_log = load_access_log()
-                st.caption(f"Access log ({len(access_log)} entries)")
-                if access_log:
-                    for entry in reversed(access_log[-20:]):
-                        st.caption(f"{entry.get('timestamp', '?')}  ·  {entry.get('ip', '?')}")
-                else:
-                    st.caption("No accesses recorded yet")
-            else:
-                with st.popover("Admin"):
-                    admin_input = st.text_input("Admin key", type="password", key="admin_key_input")
-                    if st.button("Unlock", key="admin_unlock"):
-                        if admin_input == ADMIN_KEY:
-                            st.session_state.admin_authenticated = True
-                            st.query_params["adm"] = _auth_token(ADMIN_KEY, salt="gurma_adm")
-                            st.rerun()
-                        else:
-                            st.error("Invalid")
-    # --- Log access ---
-    log_access()
-    # --- Main Content ---
-    st.title("Rehabilitation Robotics — Competitive Landscape")
     if not data:
         st.warning("No competitor data found.")
         st.markdown("**First time?** Run the research pipeline to get started:")
         col_init1, col_init2 = st.columns(2)
         if col_init1.button("Quick Start (10 searches)", type="primary", width="stretch"):
             with st.spinner("Running core competitor searches..."):
                 core_queries = [
@@ -1071,15 +864,15 @@ def main():
                     progress.progress((i + 1) / (len(core_queries) + 1), f"Searching: {q[:30]}...")
                     if run_search(q):
                         success += 1
                 progress.progress(1.0, "Extracting data...")
                 run_extract()
                 progress.empty()
                 st.success(f"Done! {success}/{len(core_queries)} searches completed.")
                 st.cache_data.clear()
                 st.rerun()
         if col_init2.button("Full Research (47 searches)", width="stretch"):
             with st.spinner("Running full competitor research..."):
                 queries = []
@@ -1087,45 +880,45 @@ def main():
                     for template in BATCH_QUERY_TEMPLATES:
                         queries.append(template.format(company=company))
                 queries.extend(MARKET_QUERIES)
                 progress = st.progress(0, "Starting...")
                 success = 0
                 for i, q in enumerate(queries):
                     progress.progress((i + 1) / (len(queries) + 1), f"[{i+1}/{len(queries)}] {q[:30]}...")
                     if run_search(q):
                         success += 1
                 progress.progress(1.0, "Extracting data...")
                 run_extract()
                 progress.empty()
                 st.success(f"Done! {success}/{len(queries)} searches completed.")
                 st.cache_data.clear()
                 st.rerun()
         return
     competitors = data.get("competitors", [])
     market = data.get("market", {})
     # ===== MARKET & OPPORTUNITY =====
     col_market, col_opp = st.columns([1, 1])
     with col_market:
         st.markdown("### Market")
         size_2024 = market.get('size_2024', 2e9)
         size_2029 = market.get('size_2029_ai', 9.1e9)
         cagr = market.get('cagr', 0.278)
         m1, m2, m3 = st.columns(3)
         m1.metric("2024 Market", f"${size_2024/1e9:.1f}B")
         m2.metric("2029 AI Segment", f"${size_2029/1e9:.1f}B")
         m3.metric("CAGR", f"{cagr*100:.1f}%")
         growth_pct = min((size_2029 / size_2024 - 1) * 100, 400)
         st.progress(growth_pct / 400, text=f"{growth_pct:.0f}% projected growth (2024→2029)")
     with col_opp:
         opportunity = data.get("opportunity", {})
         headline = opportunity.get("headline", "Market opportunity detected")
@@ -1133,18 +926,16 @@ def main():
         confirmed = opportunity.get("confirmed", False)
         update_available = opportunity.get("update_available", False)
         detected_at = opportunity.get("detected_at", "")
         if confirmed:
             badge = f"<span style='color: #2ecc71;'>● Confirmed {opportunity.get('confirmed_at', detected_at)}</span>"
         elif update_available:
             badge = "<span style='color: #e67e22;'>● Update available</span>"
         else:
             badge = f"<span style='color: #3498db;'>● Auto-detected {detected_at}</span>"
-        # Source indicators
         sources = opportunity.get("sources", [])
         if not sources:
-            # Backward compat with old intel_sourced/llm_synthesized booleans
             if opportunity.get("intel_sourced"):
                 sources.append("intel")
             if opportunity.get("llm_synthesized"):
@@ -1155,9 +946,9 @@ def main():
             for s in sources if s in badge_labels
         ]
         source_html = " ".join(source_tags)
         points_html = "".join(f"<li>{p}</li>" for p in points[:4])
         st.markdown(f"""
         <div style="
             background: linear-gradient(135deg, #1a472a 0%, #2d5a3c 100%);
@@ -1174,7 +965,7 @@ def main():
             </ul>
         </div>
         """, unsafe_allow_html=True)
         opp_col1, opp_col2 = st.columns(2)
         if not confirmed:
             if opp_col1.button("Confirm", key="confirm_opp", width="stretch"):
@@ -1186,19 +977,19 @@ def main():
                 apply_opportunity_update()
                 st.cache_data.clear()
                 st.rerun()
     st.divider()
     # ===== COMPETITOR CARDS =====
     st.header("Competitors")
     sorted_competitors = sorted(competitors, key=lambda x: x.get("mentions", 0), reverse=True)
     legend_items = [f"<span style='color: {v['color']};'>●</span> {v['label']}" for k, v in STATUS_CONFIG.items() if k != "unknown"]
     st.markdown(" &nbsp;|&nbsp; ".join(legend_items), unsafe_allow_html=True)
     col1, col2 = st.columns(2)
     for i, comp in enumerate(sorted_competitors):
         with col1 if i % 2 == 0 else col2:
             status = comp.get("status", "unknown")
@@ -1206,7 +997,7 @@ def main():
             color = status_color(status)
             label = status_label(status)
             mentions = comp.get("mentions", 0)
             st.markdown(f"""
             <div style="
                 border: 1px solid {color}40;
@@ -1232,37 +1023,37 @@ def main():
                 </div>
             </div>
             """, unsafe_allow_html=True)
             with st.expander("Details", expanded=False):
                 m1, m2 = st.columns(2)
                 if comp.get("stock"):
                     m1.metric("Stock", f"${comp['stock']:.2f}")
                 if comp.get("funding"):
                     m2.metric("Funding", f"${comp['funding']/1e6:.0f}M")
                 if comp.get("notes"):
                     st.caption(comp["notes"][:200] + "..." if len(comp.get("notes", "")) > 200 else comp.get("notes", ""))
                 events = comp.get("events", [])[:3]
                 if events:
                     st.markdown("**Recent:**")
                     for e in events:
                         st.caption(f"• {e.get('date', 'N/A')}: {e.get('event', '')[:80]}...")
                 urls = comp.get("sample_urls", [])[:2]
                 if urls:
                     for url in urls:
                         st.markdown(f"[Source →]({url})")
     # --- Timeline ---
     st.header("Timeline")
     timeline_fig = build_timeline_figure(sorted_competitors, date_range)
     if timeline_fig:
         st.plotly_chart(timeline_fig, width="stretch")
     else:
         st.info("No events in selected time range")
     # --- Deep Intel ---
     intel_reports = load_intel_reports()
     if intel_reports:
@@ -1320,7 +1111,7 @@ def main():
     with st.expander("Recent News", expanded=False):
         news_by_company = {c["name"]: [] for c in competitors}
         news_by_company["Other"] = []
         for r in research:
             timestamp = r.get("timestamp", "")
             for result in r.get("results", []):
@@ -1330,7 +1121,7 @@ def main():
                     "url": result.get("url", ""),
                     "date": timestamp[:10] if timestamp else ""
                 }
                 text = (item["title"] + " " + item["snippet"]).lower()
                 found = False
                 for comp in competitors:
@@ -1341,13 +1132,13 @@ def main():
                         break
                 if not found:
                     news_by_company["Other"].append(item)
         company_options = ["All"] + [c["name"] for c in competitors if news_by_company.get(c["name"])]
         company_filter = st.selectbox("Filter by company", company_options, index=0)
         displayed = 0
         max_display = 12
         if company_filter == "All":
             active_companies = [c for c in competitors if news_by_company.get(c["name"])]
             per_company = max(2, max_display // len(active_companies)) if active_companies else 0
@@ -1363,10 +1154,230 @@ def main():
             for item in items[:max_display]:
                 _render_news_item(item, company_filter)
                 displayed += 1
         if displayed == 0:
             st.info("No news found. Run some searches!")
 if __name__ == "__main__":
     main()

 IS_HF_SPACE = os.getenv("HF_SPACE") or Path("/app/research.py").exists()
 if IS_HF_SPACE:
+    # HF Space: research.py shim re-exports everything
     from research import (
         SearchService,
         CompetitorExtractor,
         LLM_ENABLED,
     )
 else:
+    # Local: import via package __init__
     sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+    from src.utils import (
         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
 # ============================================================
+# Competitive Intel Page
 # ============================================================
+def _render_intel_page(data, research, date_range):
+    """Competitive intel main content — rendered inside its tab."""
     if not data:
         st.warning("No competitor data found.")
         st.markdown("**First time?** Run the research pipeline to get started:")
         col_init1, col_init2 = st.columns(2)
         if col_init1.button("Quick Start (10 searches)", type="primary", width="stretch"):
             with st.spinner("Running core competitor searches..."):
                 core_queries = [
                     progress.progress((i + 1) / (len(core_queries) + 1), f"Searching: {q[:30]}...")
                     if run_search(q):
                         success += 1
                 progress.progress(1.0, "Extracting data...")
                 run_extract()
                 progress.empty()
                 st.success(f"Done! {success}/{len(core_queries)} searches completed.")
                 st.cache_data.clear()
                 st.rerun()
         if col_init2.button("Full Research (47 searches)", width="stretch"):
             with st.spinner("Running full competitor research..."):
                 queries = []
                     for template in BATCH_QUERY_TEMPLATES:
                         queries.append(template.format(company=company))
                 queries.extend(MARKET_QUERIES)
                 progress = st.progress(0, "Starting...")
                 success = 0
                 for i, q in enumerate(queries):
                     progress.progress((i + 1) / (len(queries) + 1), f"[{i+1}/{len(queries)}] {q[:30]}...")
                     if run_search(q):
                         success += 1
                 progress.progress(1.0, "Extracting data...")
                 run_extract()
                 progress.empty()
                 st.success(f"Done! {success}/{len(queries)} searches completed.")
                 st.cache_data.clear()
                 st.rerun()
         return
     competitors = data.get("competitors", [])
     market = data.get("market", {})
     # ===== MARKET & OPPORTUNITY =====
     col_market, col_opp = st.columns([1, 1])
     with col_market:
         st.markdown("### Market")
         size_2024 = market.get('size_2024', 2e9)
         size_2029 = market.get('size_2029_ai', 9.1e9)
         cagr = market.get('cagr', 0.278)
         m1, m2, m3 = st.columns(3)
         m1.metric("2024 Market", f"${size_2024/1e9:.1f}B")
         m2.metric("2029 AI Segment", f"${size_2029/1e9:.1f}B")
         m3.metric("CAGR", f"{cagr*100:.1f}%")
         growth_pct = min((size_2029 / size_2024 - 1) * 100, 400)
         st.progress(growth_pct / 400, text=f"{growth_pct:.0f}% projected growth (2024→2029)")
     with col_opp:
         opportunity = data.get("opportunity", {})
         headline = opportunity.get("headline", "Market opportunity detected")
         confirmed = opportunity.get("confirmed", False)
         update_available = opportunity.get("update_available", False)
         detected_at = opportunity.get("detected_at", "")
         if confirmed:
             badge = f"<span style='color: #2ecc71;'>● Confirmed {opportunity.get('confirmed_at', detected_at)}</span>"
         elif update_available:
             badge = "<span style='color: #e67e22;'>● Update available</span>"
         else:
             badge = f"<span style='color: #3498db;'>● Auto-detected {detected_at}</span>"
         sources = opportunity.get("sources", [])
         if not sources:
             if opportunity.get("intel_sourced"):
                 sources.append("intel")
             if opportunity.get("llm_synthesized"):
             for s in sources if s in badge_labels
         ]
         source_html = " ".join(source_tags)
         points_html = "".join(f"<li>{p}</li>" for p in points[:4])
         st.markdown(f"""
         <div style="
             background: linear-gradient(135deg, #1a472a 0%, #2d5a3c 100%);
             </ul>
         </div>
         """, unsafe_allow_html=True)
         opp_col1, opp_col2 = st.columns(2)
         if not confirmed:
             if opp_col1.button("Confirm", key="confirm_opp", width="stretch"):
                 apply_opportunity_update()
                 st.cache_data.clear()
                 st.rerun()
     st.divider()
     # ===== COMPETITOR CARDS =====
     st.header("Competitors")
     sorted_competitors = sorted(competitors, key=lambda x: x.get("mentions", 0), reverse=True)
     legend_items = [f"<span style='color: {v['color']};'>●</span> {v['label']}" for k, v in STATUS_CONFIG.items() if k != "unknown"]
     st.markdown(" &nbsp;|&nbsp; ".join(legend_items), unsafe_allow_html=True)
     col1, col2 = st.columns(2)
     for i, comp in enumerate(sorted_competitors):
         with col1 if i % 2 == 0 else col2:
             status = comp.get("status", "unknown")
             color = status_color(status)
             label = status_label(status)
             mentions = comp.get("mentions", 0)
             st.markdown(f"""
             <div style="
                 border: 1px solid {color}40;
                 </div>
             </div>
             """, unsafe_allow_html=True)
             with st.expander("Details", expanded=False):
                 m1, m2 = st.columns(2)
                 if comp.get("stock"):
                     m1.metric("Stock", f"${comp['stock']:.2f}")
                 if comp.get("funding"):
                     m2.metric("Funding", f"${comp['funding']/1e6:.0f}M")
                 if comp.get("notes"):
                     st.caption(comp["notes"][:200] + "..." if len(comp.get("notes", "")) > 200 else comp.get("notes", ""))
                 events = comp.get("events", [])[:3]
                 if events:
                     st.markdown("**Recent:**")
                     for e in events:
                         st.caption(f"• {e.get('date', 'N/A')}: {e.get('event', '')[:80]}...")
                 urls = comp.get("sample_urls", [])[:2]
                 if urls:
                     for url in urls:
                         st.markdown(f"[Source →]({url})")
     # --- Timeline ---
     st.header("Timeline")
     timeline_fig = build_timeline_figure(sorted_competitors, date_range)
     if timeline_fig:
         st.plotly_chart(timeline_fig, width="stretch")
     else:
         st.info("No events in selected time range")
     # --- Deep Intel ---
     intel_reports = load_intel_reports()
     if intel_reports:
     with st.expander("Recent News", expanded=False):
         news_by_company = {c["name"]: [] for c in competitors}
         news_by_company["Other"] = []
         for r in research:
             timestamp = r.get("timestamp", "")
             for result in r.get("results", []):
                     "url": result.get("url", ""),
                     "date": timestamp[:10] if timestamp else ""
                 }
                 text = (item["title"] + " " + item["snippet"]).lower()
                 found = False
                 for comp in competitors:
                         break
                 if not found:
                     news_by_company["Other"].append(item)
         company_options = ["All"] + [c["name"] for c in competitors if news_by_company.get(c["name"])]
         company_filter = st.selectbox("Filter by company", company_options, index=0)
         displayed = 0
         max_display = 12
         if company_filter == "All":
             active_companies = [c for c in competitors if news_by_company.get(c["name"])]
             per_company = max(2, max_display // len(active_companies)) if active_companies else 0
             for item in items[:max_display]:
                 _render_news_item(item, company_filter)
                 displayed += 1
         if displayed == 0:
             st.info("No news found. Run some searches!")
+# ============================================================
+# Main Application
+# ============================================================
+def main():
+    # Check access
+    if not check_access():
+        show_login_page()
+        return
+    # On HF Space, optionally hydrate runtime data from a private dataset repo.
+    sync_status = sync_private_data_if_configured()
+    if sync_status.get("status") == "error":
+        st.error(f"Private data sync failed: {sync_status.get('reason', 'unknown error')}")
+        return
+    data = load_data()
+    research = load_research_files()
+    # --- Sidebar (Competitive Intel controls) ---
+    with st.sidebar:
+        # === RESEARCH ===
+        st.header("Research")
+        queries_text = st.text_area(
+            "Queries",
+            value=DEFAULT_QUERIES,
+            height=150,
+            help="Enter search queries, one per line."
+        )
+        queries = [q.strip() for q in queries_text.strip().split("\n") if q.strip()]
+        # AI analysis option (only if LLM enabled)
+        analyze_with_ai = False
+        if LLM_ENABLED:
+            analyze_with_ai = st.checkbox("Analyze with AI", value=True, help="Use LLM to extract strategic insights from results")
+        if st.button(f"Run {len(queries)} searches", width="stretch", type="primary"):
+            progress = st.progress(0, text="Starting...")
+            success, total, failed, insights = run_expand_research(
+                queries,
+                progress_callback=lambda p, t: progress.progress(p, text=t),
+                analyze_with_ai=analyze_with_ai
+            )
+            progress.empty()
+            if success > 0:
+                msg = f"{success}/{total} searches done"
+                if insights:
+                    msg += f" + {len(insights)} AI insights"
+                    # Store insights in session state for display
+                    st.session_state.last_insights = insights
+                st.success(msg)
+            st.cache_data.clear()
+            st.rerun()
+        # Show last AI insights if any
+        if st.session_state.get("last_insights"):
+            with st.expander("AI Insights", expanded=True):
+                for insight in st.session_state.last_insights:
+                    st.caption(f"• {insight}")
+                if st.button("Clear", key="clear_insights"):
+                    del st.session_state.last_insights
+                    st.rerun()
+        st.divider()
+        # === DEEP INTEL ===
+        st.header("Deep Intel")
+        intel_company = st.selectbox("Competitor", COMPETITORS, index=0)
+        intel_categories = st.multiselect(
+            "Categories",
+            options=list(DEEP_INTEL_CATEGORIES.keys()),
+            default=list(DEEP_INTEL_CATEGORIES.keys()),
+            format_func=lambda k: DEEP_INTEL_CATEGORIES[k]["label"],
+        )
+        btn_col1, btn_col2 = st.columns([3, 1])
+        run_clicked = btn_col1.button("Run Deep Intel", width="stretch")
+        stop_clicked = btn_col2.button("Stop", key="stop_intel", width="stretch")
+        if stop_clicked:
+            st.session_state["intel_stop"] = True
+        if run_clicked:
+            st.session_state["intel_stop"] = False
+            agent = CompetitorIntelAgent(intel_company)
+            total_queries = sum(
+                len(DEEP_INTEL_CATEGORIES[c]["queries"])
+                for c in intel_categories if c in DEEP_INTEL_CATEGORIES
+            )
+            progress = st.progress(0, text=f"Starting {intel_company}...")
+            completed = [0]
+            original_search = agent.search.search
+            def _tracked_search(query, max_results=10, save=True):
+                if st.session_state.get("intel_stop"):
+                    return []
+                completed[0] += 1
+                progress.progress(
+                    min(completed[0] / max(total_queries, 1), 0.95),
+                    text=f"[{completed[0]}/{total_queries}] {query[:40]}...",
+                )
+                return original_search(query, max_results=max_results, save=save)
+            agent.search.search = _tracked_search
+            report_path = agent.run(
+                categories=intel_categories or None,
+                delay=1.0,
+            )
+            progress.progress(1.0, text="Done!")
+            progress.empty()
+            stopped = st.session_state.get("intel_stop", False)
+            findings = sum(len(s.findings) for s in agent.sections.values())
+            gaps = sum(len(s.gaps) for s in agent.sections.values())
+            if stopped:
+                st.warning(f"Stopped early — {intel_company}: {findings} findings, {gaps} gaps (partial)")
+            else:
+                st.success(f"{intel_company}: {findings} findings, {gaps} gaps")
+            st.session_state["intel_stop"] = False
+            st.cache_data.clear()
+            st.rerun()
+        st.divider()
+        # === DATA ===
+        st.header("Data")
+        date_range = st.selectbox(
+            "Time range",
+            ["All time", "Last 7 days", "Last 30 days", "Last 90 days"],
+            index=0,
+            label_visibility="collapsed"
+        )
+        col1, col2 = st.columns(2)
+        if col1.button("Refresh", width="stretch", help="Re-extract from research files"):
+            with st.spinner("..."):
+                run_extract()
+                st.cache_data.clear()
+                st.rerun()
+        if data:
+            report = export_html(data, research, date_range)
+            col2.download_button(
+                "Export",
+                report,
+                file_name=f"report-{datetime.now().strftime('%Y%m%d')}.html",
+                mime="text/html",
+                width="stretch"
+            )
+        st.divider()
+        # === STATUS ===
+        st.caption(f"{len(research)} files · Updated {data.get('_generated', 'N/A')[:10] if data else 'never'}")
+        if ACCESS_KEY and st.session_state.get("authenticated"):
+            if st.button("Logout", width="stretch"):
+                st.session_state.authenticated = False
+                st.session_state.admin_authenticated = False
+                st.query_params.pop("auth", None)
+                st.query_params.pop("adm", None)
+                st.rerun()
+        # === ADMIN: Access Log ===
+        if ADMIN_KEY:
+            # Auto-authenticate from URL token
+            if not st.session_state.get("admin_authenticated"):
+                if st.query_params.get("adm") == _auth_token(ADMIN_KEY, salt="gurma_adm"):
+                    st.session_state.admin_authenticated = True
+            st.divider()
+            if st.session_state.get("admin_authenticated"):
+                access_log = load_access_log()
+                st.caption(f"Access log ({len(access_log)} entries)")
+                if access_log:
+                    for entry in reversed(access_log[-20:]):
+                        st.caption(f"{entry.get('timestamp', '?')}  ·  {entry.get('ip', '?')}")
+                else:
+                    st.caption("No accesses recorded yet")
+            else:
+                with st.popover("Admin"):
+                    admin_input = st.text_input("Admin key", type="password", key="admin_key_input")
+                    if st.button("Unlock", key="admin_unlock"):
+                        if admin_input == ADMIN_KEY:
+                            st.session_state.admin_authenticated = True
+                            st.query_params["adm"] = _auth_token(ADMIN_KEY, salt="gurma_adm")
+                            st.rerun()
+                        else:
+                            st.error("Invalid")
+    # --- Log access ---
+    log_access()
+    # --- Main Content (Tabs) ---
+    tab_intel, tab_eval, tab_tr = st.tabs([
+        "Competitive Intel",
+        "Model Evaluation",
+        "Turkey Expansion",
+    ])
+    with tab_intel:
+        _render_intel_page(data, research, date_range)
+    with tab_eval:
+        if IS_HF_SPACE:
+            from eval_tab import render_eval_tab
+        else:
+            from src.dashboard.eval_tab import render_eval_tab
+        render_eval_tab()
+    with tab_tr:
+        if IS_HF_SPACE:
+            from tr_tab import render_tr_tab
+        else:
+            from src.dashboard.tr_tab import render_tr_tab
+        render_tr_tab()
 if __name__ == "__main__":
     main()

cli.py ADDED Viewed

	@@ -0,0 +1,340 @@

+#!/usr/bin/env python3
+"""
+GURMA.ai Research Tool — CLI entry point.
+Usage:
+    python research.py search "rehabilitation robotics market"
+    python research.py batch
+    python research.py competitor "Ekso Bionics"
+    python research.py competitor --list-categories
+    python research.py extract
+    python research.py list
+    python research.py sota
+    python research.py sota --analyze notes/research/podcast.md
+    python research.py mali
+    python research.py fonlar -c tubitak
+"""
+from __future__ import annotations
+import argparse
+import sys
+try:
+    from .config import RESEARCH_DIR, COMPETITORS, BATCH_QUERY_TEMPLATES, MARKET_QUERIES, LLM_ENABLED
+    from .search import SearchService, ResultStorage
+    from .extract import CompetitorExtractor
+    from .intel import CompetitorIntelAgent, DEEP_INTEL_CATEGORIES
+except ImportError:
+    from config import RESEARCH_DIR, COMPETITORS, BATCH_QUERY_TEMPLATES, MARKET_QUERIES, LLM_ENABLED
+    from search import SearchService, ResultStorage
+    from extract import CompetitorExtractor
+    from intel import CompetitorIntelAgent, DEEP_INTEL_CATEGORIES
+# ============================================================
+# Commands
+# ============================================================
+def cmd_search(args):
+    service = SearchService(backend=args.backend)
+    print(f"Searching: {args.query}")
+    print(f"Backend: {args.backend} | Max: {args.max_results}")
+    print("-" * 50)
+    results = service.search(args.query, args.max_results, save=args.save)
+    for i, r in enumerate(results, 1):
+        print(f"\n{i}. {r.title}")
+        print(f"   {r.url}")
+        print(f"   {r.snippet[:150]}...")
+    print(f"\n[{len(results)} results]")
+    if args.save:
+        print(f"Saved to: {RESEARCH_DIR}")
+def cmd_batch(args):
+    service = SearchService(backend=args.backend)
+    storage = ResultStorage()
+    queries = []
+    for company in COMPETITORS:
+        for template in BATCH_QUERY_TEMPLATES:
+            queries.append(template.format(company=company))
+    queries.extend(MARKET_QUERIES)
+    total_queries = len(queries)
+    skipped = 0
+    if not args.force:
+        recent = storage.get_recent_queries(days=args.days)
+        original_count = len(queries)
+        queries = [q for q in queries if q.lower().strip() not in recent]
+        skipped = original_count - len(queries)
+    print(f"Batch Research")
+    print(f"{'='*60}")
+    print(f"Competitors: {len(COMPETITORS)}")
+    print(f"Total queries: {total_queries}")
+    if skipped > 0:
+        print(f"Skipped (run in last {args.days} days): {skipped}")
+    print(f"New queries to run: {len(queries)}")
+    print(f"Output: {RESEARCH_DIR}")
+    print(f"{'='*60}")
+    if not queries:
+        print("\nNo new queries to run. Use --force to re-run all.")
+        return
+    def progress(i, total, query):
+        print(f"\n[{i}/{total}] {query}")
+    stats = service.search_batch(queries, args.max_results, args.delay, callback=progress)
+    success = sum(1 for v in stats.values() if v >= 0)
+    print(f"\n{'='*60}")
+    print(f"Complete: {success}/{len(queries)} successful")
+    if skipped > 0:
+        print(f"Skipped: {skipped} (already run recently)")
+    print(f"{'='*60}")
+def cmd_competitor(args):
+    company = args.company
+    use_external_llm = args.external_llm
+    if use_external_llm and not LLM_ENABLED:
+        print("Warning: --external-llm requested but OPENROUTER_API_KEY not found. Skipping external LLM.")
+        use_external_llm = False
+    categories = None
+    if args.categories:
+        categories = [c.strip() for c in args.categories.split(",")]
+        valid = set(DEEP_INTEL_CATEGORIES.keys())
+        invalid = [c for c in categories if c not in valid]
+        if invalid:
+            print(f"Invalid categories: {invalid}")
+            print(f"Valid: {sorted(valid)}")
+            return
+    if args.list_categories:
+        print("Available categories:")
+        for key, cat in DEEP_INTEL_CATEGORIES.items():
+            q_count = len(cat["queries"])
+            print(f"  {key:30s} {cat['label']:30s} ({q_count} queries)")
+        return
+    agent = CompetitorIntelAgent(company)
+    report_path = agent.run(
+        categories=categories,
+        use_external_llm=use_external_llm,
+        delay=args.delay,
+        max_results=args.max_results,
+    )
+    print(f"\nReport: {report_path}")
+def cmd_extract(args):
+    extractor = CompetitorExtractor()
+    print(f"Loading research from: {extractor.research_dir}")
+    data = extractor.process()
+    if not data["competitors"]:
+        print("No research files found. Run 'batch' first.")
+        return
+    output = extractor.save(data)
+    print(f"Saved to: {output}")
+    print(f"\nCompany mentions:")
+    for comp in data["competitors"]:
+        status_marker = {"collapsed": "⚠", "weak": "↓", "growing": "↑", "strong": "★"}.get(comp["status"], "•")
+        print(f"  {status_marker} {comp['name']}: {comp['mentions']} mentions ({comp['status']})")
+def cmd_sota(args):
+    try:
+        from .sota_agent import SOTAScoutAgent
+    except ImportError:
+        from sota_agent import SOTAScoutAgent
+    agent = SOTAScoutAgent()
+    if args.analyze:
+        report = agent.analyze(args.analyze)
+        print(f"\nAnalysis report: {report}")
+        return
+    agent.show(section=args.show)
+def cmd_mali(args):
+    try:
+        from .tr_agents import MaliMusavirAgent
+    except ImportError:
+        from tr_agents import MaliMusavirAgent
+    agent = MaliMusavirAgent()
+    if args.list_categories:
+        agent.list_categories()
+        return
+    categories = None
+    if args.categories:
+        categories = [c.strip() for c in args.categories.split(",")]
+        valid = set(agent.CATEGORIES.keys())
+        invalid = [c for c in categories if c not in valid]
+        if invalid:
+            print(f"Geçersiz kategoriler: {invalid}")
+            print(f"Geçerli: {sorted(valid)}")
+            return
+    report_path = agent.run(
+        categories=categories,
+        delay=args.delay,
+        max_results=args.max_results,
+    )
+    print(f"\nRapor: {report_path}")
+def cmd_fonlar(args):
+    try:
+        from .tr_agents import FonArastirmaAgent
+    except ImportError:
+        from tr_agents import FonArastirmaAgent
+    agent = FonArastirmaAgent()
+    if args.list_categories:
+        agent.list_categories()
+        return
+    categories = None
+    if args.categories:
+        categories = [c.strip() for c in args.categories.split(",")]
+        valid = set(agent.CATEGORIES.keys())
+        invalid = [c for c in categories if c not in valid]
+        if invalid:
+            print(f"Geçersiz kategoriler: {invalid}")
+            print(f"Geçerli: {sorted(valid)}")
+            return
+    report_path = agent.run(
+        categories=categories,
+        delay=args.delay,
+        max_results=args.max_results,
+    )
+    print(f"\nRapor: {report_path}")
+def cmd_list(args):
+    storage = ResultStorage()
+    searches = storage.list_searches(args.limit)
+    if not searches:
+        print(f"No searches in {RESEARCH_DIR}")
+        return
+    print(f"Recent searches ({RESEARCH_DIR}):\n")
+    for s in searches:
+        print(f"  {s['timestamp'][:10]}  {s['results']:2d} results  {s['query'][:50]}")
+# ============================================================
+# Argparse
+# ============================================================
+def main():
+    parser = argparse.ArgumentParser(
+        description="GURMA.ai Research Tool",
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Commands")
+    # search
+    p_search = subparsers.add_parser("search", help="Single web search")
+    p_search.add_argument("query", help="Search query")
+    p_search.add_argument("-b", "--backend", default="duckduckgo",
+                         choices=["duckduckgo", "ddg", "serpapi", "brave"])
+    p_search.add_argument("-n", "--max-results", type=int, default=10)
+    p_search.add_argument("--no-save", dest="save", action="store_false")
+    p_search.set_defaults(func=cmd_search)
+    # batch
+    p_batch = subparsers.add_parser("batch", help="Batch research all competitors")
+    p_batch.add_argument("-b", "--backend", default="duckduckgo")
+    p_batch.add_argument("-n", "--max-results", type=int, default=10)
+    p_batch.add_argument("-d", "--delay", type=float, default=0.5)
+    p_batch.add_argument("--days", type=int, default=7,
+                        help="Skip queries run within N days (default: 7)")
+    p_batch.add_argument("-f", "--force", action="store_true",
+                        help="Force re-run all queries (ignore deduplication)")
+    p_batch.set_defaults(func=cmd_batch)
+    # competitor (deep intel)
+    p_comp = subparsers.add_parser("competitor", help="Deep competitive intelligence on a company")
+    p_comp.add_argument("company", nargs="?", default="", help="Company name (e.g. 'Ekso Bionics')")
+    p_comp.add_argument("--external-llm", action="store_true",
+                       help="Also use external LLM (OpenRouter) for enhanced analysis")
+    p_comp.add_argument("-c", "--categories", type=str, default=None,
+                       help="Comma-separated categories (default: all)")
+    p_comp.add_argument("--list-categories", action="store_true",
+                       help="List available categories")
+    p_comp.add_argument("-n", "--max-results", type=int, default=10)
+    p_comp.add_argument("-d", "--delay", type=float, default=1.0,
+                       help="Delay between searches in seconds (default: 1.0)")
+    p_comp.set_defaults(func=cmd_competitor)
+    # sota
+    p_sota = subparsers.add_parser("sota", help="SOTA technology knowledge base for GURMA.ai")
+    p_sota.add_argument("--analyze", "-a", type=str, default=None,
+                       help="Analyze a document and update knowledge base")
+    p_sota.add_argument("--show", "-s", type=str, default=None, nargs="?",
+                       const=None,
+                       choices=["models", "techniques", "stack", "principles", "actions", "sources"],
+                       help="Show specific KB section (default: summary)")
+    p_sota.set_defaults(func=cmd_sota)
+    # mali (Turkish company formation)
+    p_mali = subparsers.add_parser("mali", help="Türkiye şirket kuruluşu araştırması")
+    p_mali.add_argument("-c", "--categories", type=str, default=None,
+                       help="Virgülle ayrılmış kategoriler (varsayılan: tümü)")
+    p_mali.add_argument("--list-categories", action="store_true",
+                       help="Mevcut kategorileri listele")
+    p_mali.add_argument("-n", "--max-results", type=int, default=10)
+    p_mali.add_argument("-d", "--delay", type=float, default=1.0)
+    p_mali.set_defaults(func=cmd_mali)
+    # fonlar (Turkish government funding research)
+    p_fonlar = subparsers.add_parser("fonlar", help="TÜBİTAK ve devlet fonları araştırması")
+    p_fonlar.add_argument("-c", "--categories", type=str, default=None,
+                         help="Virgülle ayrılmış kategoriler (varsayılan: tümü)")
+    p_fonlar.add_argument("--list-categories", action="store_true",
+                         help="Mevcut kategorileri listele")
+    p_fonlar.add_argument("-n", "--max-results", type=int, default=10)
+    p_fonlar.add_argument("-d", "--delay", type=float, default=1.0)
+    p_fonlar.set_defaults(func=cmd_fonlar)
+    # extract
+    p_extract = subparsers.add_parser("extract", help="Extract competitor data to JSON")
+    p_extract.set_defaults(func=cmd_extract)
+    # list
+    p_list = subparsers.add_parser("list", help="List saved searches")
+    p_list.add_argument("-l", "--limit", type=int, default=20)
+    p_list.set_defaults(func=cmd_list)
+    args = parser.parse_args()
+    if hasattr(args, "func"):
+        args.func(args)
+    else:
+        parser.print_help()
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+GURMA.ai shared configuration.
+Environment detection, directory paths, API keys, and research constants
+used across all agents and the dashboard.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+# ============================================================
+# Environment Detection
+# ============================================================
+def _detect_project_root() -> Path:
+    """Detect project root based on environment."""
+    if os.getenv("HF_SPACE") or Path("/app/research.py").exists():
+        return Path("/app")
+    return Path(__file__).parent.parent.parent
+PROJECT_ROOT = _detect_project_root()
+IS_HF_SPACE = PROJECT_ROOT == Path("/app")
+if not IS_HF_SPACE:
+    try:
+        from dotenv import load_dotenv
+        load_dotenv(PROJECT_ROOT / ".env")
+    except ImportError:
+        pass
+# ============================================================
+# Directories
+# ============================================================
+if IS_HF_SPACE:
+    RESEARCH_DIR = PROJECT_ROOT / "data"
+    DATA_DIR = PROJECT_ROOT / "data"
+else:
+    RESEARCH_DIR = PROJECT_ROOT / "data"
+    DATA_DIR = PROJECT_ROOT / "src" / "dashboard"
+RESEARCH_DIR.mkdir(parents=True, exist_ok=True)
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+# ============================================================
+# API Keys & LLM Config
+# ============================================================
+SERPAPI_KEY = os.getenv("SERPAPI_KEY")
+BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+LLM_MODEL = "deepseek/deepseek-chat"
+LLM_ENABLED = bool(OPENROUTER_API_KEY)
+# ============================================================
+# Research Constants
+# ============================================================
+COMPETITORS = [
+    "Hocoma", "Ekso Bionics", "Lifeward ReWalk", "Fourier Intelligence",
+    "Cyberdyne HAL", "Wandercraft", "Myomo", "Bionik",
+]
+BATCH_QUERY_TEMPLATES = [
+    "{company} latest news 2025 2026",
+    "{company} funding investors valuation",
+    "{company} FDA approval regulatory",
+    "{company} partnerships collaborations",
+    "{company} AI machine learning technology",
+    "site:accessdata.fda.gov {company}",
+    "site:clinicaltrials.gov {company} rehabilitation",
+    "site:crunchbase.com {company}",
+    "site:sec.gov {company} 10-K OR 8-K",
+    "site:patents.google.com {company} exoskeleton OR rehabilitation",
+]
+MARKET_QUERIES = [
+    "rehabilitation robotics market size 2026 forecast",
+    "exoskeleton market growth AI integration",
+    "rehabilitation robotics insurance reimbursement",
+    "medical exoskeleton FDA approval 2025",
+    "stroke rehabilitation AI technology",
+    "spinal cord injury exoskeleton treatment",
+    "rehabilitation robotics competitive landscape",
+    "site:exoskeletonreport.com 2025 2026",
+    "site:medgadget.com exoskeleton rehabilitation",
+    "site:fda.gov rehabilitation robotics guidance",
+    "MDR medical device regulation exoskeleton CE mark 2025",
+    "site:pubmed.ncbi.nlm.nih.gov rehabilitation robotics AI 2024 2025",
+    "exoskeleton insurance coverage CMS reimbursement code",
+    "rehabilitation robotics HCPCS code billing",
+]

extract.py ADDED Viewed

	@@ -0,0 +1,537 @@

+"""
+Competitor data extraction and opportunity detection.
+Builds competitors.json from raw research files + deep intel findings,
+detects market opportunities, and optionally synthesizes via LLM.
+"""
+from __future__ import annotations
+import json
+import re
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+try:
+    from .config import RESEARCH_DIR, DATA_DIR, LLM_ENABLED
+    from .llm import LLMClient
+except ImportError:
+    from config import RESEARCH_DIR, DATA_DIR, LLM_ENABLED
+    from llm import LLMClient
+# ============================================================
+# Company Definitions & Extraction Patterns
+# ============================================================
+COMPANY_DEFINITIONS = {
+    "Hocoma": {"aliases": ["hocoma", "dih", "lokomat"], "country": "Switzerland", "product": "Lokomat", "status": "collapsed", "verified": True},
+    "Ekso Bionics": {"aliases": ["ekso", "eksobionics", "eksonr"], "country": "USA", "product": "EksoNR", "status": "weak", "verified": True},
+    "Cyberdyne": {"aliases": ["cyberdyne", "hal exoskeleton"], "country": "Japan", "product": "HAL", "status": "strong", "verified": True},
+    "Lifeward": {"aliases": ["lifeward", "rewalk", "alterg"], "country": "Israel/USA", "product": "ReWalk 7", "status": "consolidating", "verified": True},
+    "Fourier": {"aliases": ["fourier", "fourier intelligence"], "country": "China", "product": "X1, M2", "status": "growing", "verified": True},
+    "Myomo": {"aliases": ["myomo", "myopro"], "country": "USA", "product": "MyoPro", "status": "stable", "verified": False},
+    "Bionik": {"aliases": ["bionik", "inmotion"], "country": "Canada", "product": "InMotion", "status": "stable", "verified": False},
+    "Wandercraft": {"aliases": ["wandercraft", "atalante"], "country": "France", "product": "Atalante X", "status": "growing", "verified": False},
+}
+STATUS_KEYWORDS = [
+    ("collapsed", ["bankrupt", "delisted", "suspended", "collapse", "shut down", "ceased", "nasdaq delisted"]),
+    ("weak", ["52-week low", "struggling", "losses", "declining", "layoffs"]),
+    ("growing", ["series e", "series d", "series c", "funding round", "$109 million"]),
+    ("consolidating", ["acquired", "merger", "acquisition"]),
+    ("strong", ["leader", "dominant", "profitable"]),
+]
+DATE_PATTERN = re.compile(
+    r'((?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})'
+    r'|(\d{4}-\d{2}-\d{2})'
+    r'|(\d{4}-\d{2})'
+    r'|((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4})'
+)
+MONEY_PATTERN = re.compile(r'\$[\d,]+(?:\.\d+)?(?:\s*(?:million|billion|M|B))?|\d+(?:\.\d+)?\s*(?:million|billion)', re.IGNORECASE)
+# ============================================================
+# Competitor Extractor
+# ============================================================
+class CompetitorExtractor:
+    """Extract structured competitor data from research results."""
+    def __init__(self, research_dir: Path = RESEARCH_DIR,
+                 output_file: Path = None):
+        self.research_dir = research_dir
+        self.output_file = output_file or (DATA_DIR / "competitors.json")
+    def load_research_files(self) -> list[dict]:
+        results = []
+        if not self.research_dir.exists():
+            return results
+        for json_file in self.research_dir.glob("*.json"):
+            if json_file.name.startswith("."):
+                continue
+            try:
+                with open(json_file) as f:
+                    data = json.load(f)
+                    data["_source_file"] = json_file.name
+                    results.append(data)
+            except Exception as e:
+                print(f"Error loading {json_file}: {e}")
+        return results
+    def find_mentions(self, text: str) -> list[str]:
+        text_lower = text.lower()
+        mentioned = []
+        for company, info in COMPANY_DEFINITIONS.items():
+            if any(alias in text_lower for alias in info["aliases"]):
+                mentioned.append(company)
+        return mentioned
+    def normalize_date(self, date_str: str) -> str | None:
+        formats = ["%B %d, %Y", "%B %d %Y", "%b %d, %Y", "%b %d %Y", "%Y-%m-%d", "%Y-%m"]
+        for fmt in formats:
+            try:
+                dt = datetime.strptime(date_str.strip(), fmt)
+                if dt.year < 2010:
+                    return None
+                return dt.strftime("%Y-%m-%d")
+            except:
+                pass
+        return date_str
+    def extract_events(self, text: str, company: str) -> list[dict]:
+        events = []
+        aliases = COMPANY_DEFINITIONS[company]["aliases"]
+        for match in DATE_PATTERN.finditer(text):
+            date_str = match.group(0)
+            if not date_str:
+                continue
+            start = max(0, match.start() - 50)
+            end = min(len(text), match.end() + 150)
+            context = text[start:end]
+            normalized = self.normalize_date(date_str)
+            if normalized and any(alias in context.lower() for alias in aliases):
+                events.append({
+                    "date": normalized,
+                    "context": context.strip()
+                })
+        return events
+    def detect_status(self, snippets: list[str], default: str) -> str:
+        text = " ".join(snippets).lower()
+        for status, keywords in STATUS_KEYWORDS:
+            if any(kw.lower() in text for kw in keywords):
+                return status
+        return default
+    def extract_stock(self, snippets: list[str]) -> Optional[float]:
+        for snippet in snippets:
+            match = re.search(r'\$(\d+\.?\d*)', snippet)
+            if match and float(match.group(1)) < 1000:
+                return float(match.group(1))
+        return None
+    def extract_funding(self, money_mentions: list[str]) -> Optional[int]:
+        for m in money_mentions:
+            match = re.search(r'(\d+)\s*(?:million|M)', m, re.IGNORECASE)
+            if match:
+                return int(match.group(1)) * 1_000_000
+            match = re.search(r'(\d+\.?\d*)\s*(?:billion|B)', m, re.IGNORECASE)
+            if match:
+                return int(float(match.group(1)) * 1_000_000_000)
+        return None
+    def _load_intel_findings(self) -> dict[str, list[dict]]:
+        """Load confirmed findings from Deep Intel reports, grouped by company."""
+        intel_dir = self.research_dir / "intel"
+        if not intel_dir.exists():
+            return {}
+        findings_by_company: dict[str, list[dict]] = {}
+        seen_companies: set[str] = set()
+        for json_file in sorted(intel_dir.glob("*_intel.json"), reverse=True):
+            try:
+                with open(json_file) as f:
+                    data = json.load(f)
+                company = data.get("company", "")
+                if not company or company in seen_companies:
+                    continue
+                seen_companies.add(company)
+                all_findings = []
+                for section in data.get("sections", {}).values():
+                    for finding in section.get("findings", []):
+                        if isinstance(finding, dict) and finding.get("text"):
+                            all_findings.append(finding)
+                        elif isinstance(finding, str) and finding:
+                            all_findings.append({"text": finding, "confirmed": False, "source": ""})
+                if all_findings:
+                    findings_by_company[company] = all_findings
+            except Exception:
+                pass
+        return findings_by_company
+    def _extract_intel_opportunities(self, intel_findings: dict[str, list[dict]]) -> list[dict]:
+        """Extract opportunity signals from Deep Intel confirmed findings."""
+        opportunities = []
+        vuln_patterns = [
+            (r'(?:layoff|restructur|downsiz|headcount.?reduc)', "workforce_cut", 2),
+            (r'(?:delist|stock.?(?:drop|fall|declin)|52.week.low|penny.stock)', "financial_distress", 1),
+            (r'(?:FDA.?(?:reject|warning|recall)|regulatory.?(?:issue|fail|delay))', "regulatory_issue", 2),
+            (r'(?:bankrupt|insolvenc|cease.?operat|wind.?down|liquidat)', "collapse", 1),
+            (r'(?:customer.?complain|negative.?review|churn|losing.?customer)', "customer_risk", 2),
+            (r'(?:legacy|technical.?debt|outdated|proprietary.?lock)', "tech_weakness", 3),
+            (r'(?:no.?AI|lack.?(?:of.?)?(?:data|machine.learn|personali))', "ai_gap", 2),
+        ]
+        for company, findings in intel_findings.items():
+            confirmed = [f for f in findings if f.get("confirmed")]
+            all_text = " ".join(f["text"] for f in confirmed).lower() if confirmed else ""
+            all_text_full = " ".join(f["text"] for f in findings).lower()
+            for pattern, opp_type, priority in vuln_patterns:
+                if re.search(pattern, all_text, re.IGNORECASE):
+                    match_finding = next(
+                        (f for f in confirmed if re.search(pattern, f["text"], re.IGNORECASE)),
+                        None
+                    )
+                    if match_finding:
+                        opportunities.append({
+                            "type": opp_type,
+                            "text": f"{company}: {match_finding['text'][:120]}",
+                            "priority": priority,
+                            "confirmed": True,
+                            "source": match_finding.get("source", ""),
+                            "company": company,
+                        })
+                elif re.search(pattern, all_text_full, re.IGNORECASE):
+                    match_finding = next(
+                        (f for f in findings if re.search(pattern, f["text"], re.IGNORECASE)),
+                        None
+                    )
+                    if match_finding:
+                        opportunities.append({
+                            "type": opp_type,
+                            "text": f"{company}: {match_finding['text'][:120]}",
+                            "priority": priority + 1,
+                            "confirmed": False,
+                            "source": match_finding.get("source", ""),
+                            "company": company,
+                        })
+        return opportunities
+    def _load_sota_tech_signals(self) -> list[dict]:
+        """Load tech advantage signals from SOTA knowledge base."""
+        kb_path = self.research_dir / "sota" / "knowledge_base.json"
+        if not kb_path.exists():
+            return []
+        try:
+            with open(kb_path) as f:
+                kb = json.load(f)
+        except Exception:
+            return []
+        signals = []
+        for t in kb.get("techniques", []):
+            if t.get("priority") == "high" and t.get("gurma_fit"):
+                signals.append({
+                    "type": "tech_advantage",
+                    "text": f"{t['name']}: {t['gurma_fit'][:120]}",
+                    "priority": 2,
+                    "confirmed": True,
+                    "company": "GURMA",
+                })
+        for p in kb.get("key_principles", [])[:2]:
+            if p.get("principle"):
+                signals.append({
+                    "type": "tech_principle",
+                    "text": f"{p['principle']}: {p.get('detail', '')[:100]}",
+                    "priority": 3,
+                    "confirmed": True,
+                    "company": "GURMA",
+                })
+        return signals
+    def _opportunity_changed(self, new_opps: list[dict], existing: dict) -> bool:
+        existing_points = set(existing.get("points", []))
+        new_points = set(o["text"] for o in new_opps[:4])
+        if not existing_points:
+            return True
+        new_p1_types = {o["type"] for o in new_opps if o["priority"] == 1}
+        old_raw = existing.get("raw_opportunities", [])
+        old_p1_types = {o["type"] for o in old_raw if o.get("priority") == 1}
+        if new_p1_types != old_p1_types:
+            return True
+        overlap = existing_points & new_points
+        if len(overlap) < len(existing_points) / 2:
+            return True
+        return False
+    def _synthesize_opportunity_llm(self, opportunities: list[dict],
+                                    competitors: list[dict]) -> Optional[dict]:
+        if not LLM_ENABLED:
+            return None
+        llm = LLMClient()
+        opp_text = "\n".join(
+            f"- [{o['type']}] {'[CONFIRMED]' if o.get('confirmed') else '[SPECULATIVE]'} {o['text']}"
+            for o in opportunities[:12]
+        )
+        comp_summary = "\n".join(
+            f"- {c['name']}: status={c['status']}, "
+            f"{'stock=$'+format(c['stock'], '.2f') if c.get('stock') else 'no stock data'}, "
+            f"{'funding=$'+format(c['funding']/1e6, '.0f')+'M' if c.get('funding') else 'no funding data'}"
+            for c in competitors[:8]
+        )
+        system = (
+            "You are a strategic advisor for GURMA.ai, a Swiss AI company "
+            "entering rehabilitation robotics with 15 years of patient outcome "
+            "data (not just motion data) from BAMA Teknoloji. "
+            "You produce concise, actionable strategic assessments."
+        )
+        prompt = f"""Based on the following competitive + technology signals and competitor data,
+produce a strategic opportunity assessment for GURMA.ai.
+Signals (competitive, tech advantages, and threats):
+{opp_text}
+Competitor landscape:
+{comp_summary}
+Return JSON:
+{{
+  "headline": "One punchy sentence (max 10 words) summarizing the #1 strategic opportunity",
+  "points": [
+    "Actionable insight 1 (max 20 words, include numbers where available)",
+    "Actionable insight 2",
+    "Actionable insight 3",
+    "Actionable insight 4"
+  ]
+}}
+Rules:
+- Headline should be about the OPPORTUNITY, not just a competitor's problem
+- Points should mix competitive windows, tech advantages, AND threats
+- Be specific: include dollar amounts, dates, competitor names, model/technique names
+- Maximum 4 points, ranked by strategic importance
+- confirmed signals should be weighted more heavily than speculative ones"""
+        response = llm.call(prompt, system, max_tokens=500)
+        if response:
+            match = re.search(r'\{.*\}', response, re.DOTALL)
+            if match:
+                try:
+                    result = json.loads(match.group())
+                    if result.get("headline") and result.get("points"):
+                        return result
+                except Exception:
+                    pass
+        return None
+    def detect_opportunities(self, competitors: list[dict], all_snippets: list[str]) -> dict:
+        """Detect market opportunities from competitor data + Deep Intel findings."""
+        opportunities = []
+        collapsed = [c for c in competitors if c["status"] == "collapsed"]
+        weak = [c for c in competitors if c["status"] == "weak"]
+        if collapsed:
+            names = ", ".join(c["name"] for c in collapsed)
+            opportunities.append({
+                "type": "market_gap",
+                "text": f"{names} collapsed — customers seeking alternatives",
+                "priority": 1, "confirmed": True, "company": names,
+            })
+        if weak:
+            for c in weak:
+                opp_text = f"{c['name']} financially weak"
+                if c.get("stock"):
+                    opp_text += f" (${c['stock']:.2f})"
+                opp_text += " — vulnerable to disruption"
+                opportunities.append({
+                    "type": "weakness",
+                    "text": opp_text,
+                    "priority": 2, "confirmed": True, "company": c["name"],
+                })
+        growing = [c for c in competitors if c["status"] == "growing" and c.get("funding")]
+        for c in growing:
+            funding_m = c["funding"] / 1_000_000
+            opportunities.append({
+                "type": "threat",
+                "text": f"{c['name']} well-funded (${funding_m:.0f}M) — monitor closely",
+                "priority": 3, "confirmed": True, "company": c["name"],
+            })
+        if competitors:
+            opportunities.append({
+                "type": "advantage",
+                "text": "BAMA has 15 years outcome data vs. competitors' motion data",
+                "priority": 1, "confirmed": True, "company": "BAMA",
+            })
+        intel_findings = self._load_intel_findings()
+        if intel_findings:
+            intel_opps = self._extract_intel_opportunities(intel_findings)
+            existing_keys = {(o.get("company", ""), o["type"]) for o in opportunities}
+            for io in intel_opps:
+                key = (io.get("company", ""), io["type"])
+                if key not in existing_keys:
+                    opportunities.append(io)
+                    existing_keys.add(key)
+        sota_signals = self._load_sota_tech_signals()
+        if sota_signals:
+            existing_keys = {(o.get("company", ""), o["type"]) for o in opportunities}
+            for ts in sota_signals:
+                key = (ts.get("company", ""), ts["type"])
+                if key not in existing_keys:
+                    opportunities.append(ts)
+                    existing_keys.add(key)
+        opportunities.sort(key=lambda x: x["priority"])
+        llm_result = self._synthesize_opportunity_llm(opportunities, competitors)
+        if llm_result:
+            headline = llm_result["headline"]
+            points = llm_result["points"][:4]
+        else:
+            if collapsed:
+                headline = f"{collapsed[0]['name']} collapse creates market window"
+            elif weak:
+                headline = "Competitor weakness creates opportunity"
+            else:
+                headline = "Data advantage positions GURMA.ai for growth"
+            points = [o["text"] for o in opportunities[:4]]
+        sources = ["competitor"]
+        if intel_findings:
+            sources.append("intel")
+        if sota_signals:
+            sources.append("tech")
+        if llm_result:
+            sources.append("llm")
+        return {
+            "headline": headline,
+            "points": points,
+            "detected_at": datetime.now().strftime("%Y-%m-%d"),
+            "raw_opportunities": opportunities,
+            "sources": sources,
+        }
+    def load_existing_data(self) -> Optional[dict]:
+        if self.output_file.exists():
+            try:
+                with open(self.output_file) as f:
+                    return json.load(f)
+            except:
+                pass
+        return None
+    def process(self) -> dict:
+        research_data = self.load_research_files()
+        if not research_data:
+            return {"competitors": [], "market": {}}
+        company_data = defaultdict(lambda: {
+            "mentions": 0, "snippets": [], "events": [], "money": [], "urls": []
+        })
+        for research in research_data:
+            for result in research.get("results", []):
+                text = f"{result.get('title', '')} {result.get('snippet', '')}"
+                url = result.get("url", "")
+                for company in self.find_mentions(text):
+                    cd = company_data[company]
+                    cd["mentions"] += 1
+                    cd["snippets"].append(result.get("snippet", "")[:200])
+                    cd["urls"].append(url)
+                    cd["events"].extend(self.extract_events(text, company))
+                    cd["money"].extend(MONEY_PATTERN.findall(text))
+        competitors = []
+        for company, info in COMPANY_DEFINITIONS.items():
+            data = company_data[company]
+            status = info["status"] if info.get("verified") else self.detect_status(data["snippets"], info["status"])
+            competitors.append({
+                "name": company,
+                "country": info["country"],
+                "product": info["product"],
+                "status": status,
+                "stock": self.extract_stock(data["snippets"]),
+                "funding": self.extract_funding(data["money"]),
+                "notes": data["snippets"][0] if data["snippets"] else "",
+                "mentions": data["mentions"],
+                "events": [{"date": e["date"], "event": e["context"][:100]} for e in data["events"][:10]],
+                "sample_urls": list(set(data["urls"]))[:5],
+            })
+        competitors.sort(key=lambda x: x["mentions"], reverse=True)
+        all_snippets = []
+        for company, data in company_data.items():
+            all_snippets.extend(data["snippets"])
+        new_opportunity = self.detect_opportunities(competitors, all_snippets)
+        existing = self.load_existing_data()
+        existing_opp = existing.get("opportunity", {}) if existing else {}
+        if existing_opp.get("confirmed"):
+            if self._opportunity_changed(new_opportunity.get("raw_opportunities", []), existing_opp):
+                opportunity = existing_opp
+                opportunity["update_available"] = True
+                opportunity["suggested_update"] = new_opportunity
+            else:
+                opportunity = existing_opp
+                opportunity["update_available"] = False
+        else:
+            opportunity = new_opportunity
+            opportunity["confirmed"] = False
+            opportunity["update_available"] = False
+        return {
+            "competitors": competitors,
+            "market": {"size_2024": 2_000_000_000, "size_2029_ai": 9_100_000_000, "cagr": 0.278},
+            "opportunity": opportunity,
+            "_generated": datetime.now().isoformat(),
+            "_source_files": [f.name for f in self.research_dir.glob("*.json") if not f.name.startswith(".")]
+        }
+    def save(self, data: dict = None) -> Path:
+        data = data or self.process()
+        self.output_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.output_file, "w") as f:
+            json.dump(data, f, indent=2)
+        return self.output_file

intel.py ADDED Viewed

	@@ -0,0 +1,508 @@

+"""
+Deep competitive intelligence agent.
+Runs structured research across categories for a single competitor,
+producing markdown + JSON reports with [CONFIRMED]/[SPECULATIVE] tagging.
+"""
+from __future__ import annotations
+import json
+import re
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+try:
+    from .config import RESEARCH_DIR, LLM_ENABLED
+    from .search import SearchService, WebSearchResult
+    from .llm import LLMClient
+except ImportError:
+    from config import RESEARCH_DIR, LLM_ENABLED
+    from search import SearchService, WebSearchResult
+    from llm import LLMClient
+# ============================================================
+# Intel Constants
+# ============================================================
+DEEP_INTEL_CATEGORIES = {
+    "company_overview": {
+        "label": "Company Overview",
+        "queries": [
+            "{company} founding history milestones",
+            "{company} CEO leadership team background",
+            "{company} funding rounds investors valuation",
+            "{company} employee count headcount growth",
+        ],
+    },
+    "product_technology": {
+        "label": "Product & Technology",
+        "queries": [
+            "{company} exoskeleton rehabilitation robot product specifications",
+            "{company} AI machine learning technology capabilities",
+            "{company} new product launch release 2025 2026",
+            "{company} patent filings exoskeleton rehabilitation innovation",
+            "site:patents.google.com {company} exoskeleton OR rehabilitation",
+        ],
+    },
+    "regulatory_clinical": {
+        "label": "Regulatory & Clinical",
+        "queries": [
+            "site:accessdata.fda.gov {company}",
+            "{company} FDA 510k clearance CE mark MDR approval",
+            "site:clinicaltrials.gov {company} rehabilitation",
+            "{company} clinical outcomes study peer-reviewed results",
+        ],
+    },
+    "market_channels": {
+        "label": "Market & Channels",
+        "queries": [
+            "{company} hospital clinic installations customer base",
+            "{company} insurance reimbursement coverage CMS",
+            "{company} partnerships distributors resellers",
+            "{company} conference MEDICA ACRM CES 2025 2026",
+        ],
+    },
+    "vulnerabilities_threats": {
+        "label": "Vulnerabilities & Threats",
+        "queries": [
+            "{company} weaknesses problems criticism recall",
+            "{company} layoffs restructuring financial difficulty",
+            "{company} Glassdoor employee reviews satisfaction",
+            "{company} rehabilitation robotics AI expansion strategy 2025 2026",
+            "{company} acquisitions mergers market share growth",
+            "site:sec.gov {company} 10-K OR 8-K",
+        ],
+    },
+}
+PRIMARY_SOURCE_DOMAINS = {
+    "sec.gov", "fda.gov", "clinicaltrials.gov", "patents.google.com",
+    "accessdata.fda.gov",
+    "crunchbase.com", "tracxn.com", "pitchbook.com", "cbinsights.com",
+    "bloomberg.com", "reuters.com", "wsj.com", "finance.yahoo.com",
+    "wellfound.com",
+    "linkedin.com", "glassdoor.com",
+    "g2.com", "capterra.com", "trustpilot.com",
+    "therobotreport.com", "exoskeletonreport.com", "medgadget.com",
+}
+CATEGORY_EXPECTED = {
+    "company_overview": {
+        "founding_year": [r'(?:founded|established|incorporated|started)\s+(?:in\s+)?(\d{4})'],
+        "leadership": [r'(?:CEO|Chief Executive|CTO|CFO|President|Founder|Chairman|COO)'],
+        "funding": [r'\$[\d,.]+\s*(?:million|billion|M|B)', r'(?:series\s+[A-F]|seed|IPO|funding\s+round)'],
+        "employees": [r'(\d[\d,]*)\s*(?:employees|staff|headcount|team\s+members|workers)'],
+    },
+    "product_technology": {
+        "products": [r'(?:product|device|robot|exoskeleton|system)\s'],
+        "technology": [r'(?:AI|machine\s+learning|deep\s+learning|sensor|actuator|algorithm|neural)'],
+        "patents": [r'(?:patent|IP|intellectual\s+property|invention)'],
+        "recent_launches": [r'(?:launch|release|announc|unveil|introduc)\w*\s+.{0,30}(?:2025|2026)'],
+    },
+    "regulatory_clinical": {
+        "fda_clearance": [r'(?:510\(?k\)?|FDA.?clear|FDA.?approv|de\s*novo)'],
+        "ce_mark": [r'(?:CE.?mark|MDR|EU.?approv|notified.?body)'],
+        "clinical_trials": [r'(?:clinical.?trial|NCT\d|randomized|controlled.?study|peer.?review)'],
+        "clinical_outcomes": [r'(?:outcome|efficacy|recovery.?rate|improvement|functional.?score)'],
+    },
+    "market_channels": {
+        "installations": [r'(?:hospital|clinic|center|install|deploy|site)\s'],
+        "reimbursement": [r'(?:reimburse|insurance|CMS|Medicare|Medicaid|HCPCS|coverage|payer)'],
+        "partnerships": [r'(?:partner|alliance|collaborat|distribut|reseller|dealer)'],
+        "events": [r'(?:conference|MEDICA|ACRM|CES|expo|trade\s+show|summit)'],
+    },
+    "vulnerabilities_threats": {
+        "weaknesses": [r'(?:weakness|problem|challenge|struggle|fail|recall|warning)'],
+        "financial_stress": [r'(?:layoff|restructur|loss|declining|debt|delist|penny.stock)'],
+        "employee_sentiment": [r'(?:glassdoor|employee.?review|work.?culture|turnover)'],
+        "expansion": [r'(?:expansion|new.?market|acqui|merger|market.?share|growth.?strategy)'],
+    },
+}
+CATEGORY_SYNTHESIS_QUESTIONS = {
+    "company_overview": [
+        "Founding story and key milestones",
+        "Leadership team (backgrounds, medical device experience)",
+        "Funding history (rounds, investors, valuations)",
+        "Employee count and growth trajectory",
+    ],
+    "product_technology": [
+        "Product catalog (devices, indications, patient populations)",
+        "AI / machine learning capabilities (data they train on, algorithms used)",
+        "Recent product launches and roadmap clues (last 12 months)",
+        "Patent portfolio and innovation direction",
+        "How does their technology compare to GURMA.ai's outcome-data approach?",
+    ],
+    "regulatory_clinical": [
+        "FDA clearances (510(k) numbers, De Novo, dates)",
+        "CE mark / MDR status in Europe",
+        "Active clinical trials (ClinicalTrials.gov entries, endpoints)",
+        "Published clinical outcomes (peer-reviewed studies, recovery rates)",
+        "Reimbursement status (CMS, Medicare, private payer coverage)",
+    ],
+    "market_channels": [
+        "Hospital and clinic installations (how many sites, which countries)",
+        "Insurance and reimbursement strategy (pricing, payer relationships)",
+        "Distribution partnerships and reseller network",
+        "Conference and KOL presence (MEDICA, ACRM, physician endorsements)",
+    ],
+    "vulnerabilities_threats": [
+        "What are they bad at? (clinical limitations, missing indications)",
+        "Financial health (SEC filings, cash burn, stock trajectory)",
+        "Employee sentiment (Glassdoor, hiring patterns, layoffs)",
+        "Growth strategy (acquisitions, new markets, AI investments)",
+        "What could they do that would hurt GURMA.ai most?",
+        "Early warning signals to monitor",
+    ],
+}
+# ============================================================
+# Intel Agent
+# ============================================================
+@dataclass
+class IntelSection:
+    category: str
+    label: str
+    queries_executed: list = field(default_factory=list)
+    results: list = field(default_factory=list)
+    findings: list = field(default_factory=list)
+    gaps: list = field(default_factory=list)
+    sources: list = field(default_factory=list)
+class CompetitorIntelAgent:
+    """Deep competitive intelligence agent for a single competitor.
+    Usage:
+        agent = CompetitorIntelAgent("Ekso Bionics")
+        report = agent.run()
+        report = agent.run(use_external_llm=True)
+    """
+    def __init__(self, company: str, search: SearchService = None, llm: LLMClient = None):
+        self.company = company
+        self.search = search or SearchService()
+        self.llm = llm or LLMClient()
+        self.sections: dict[str, IntelSection] = {}
+        self.output_dir = RESEARCH_DIR / "intel"
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    def run(self, categories: list[str] = None, use_external_llm: bool = False,
+            delay: float = 1.0, max_results: int = 10) -> Path:
+        cats = categories or list(DEEP_INTEL_CATEGORIES.keys())
+        total_queries = sum(
+            len(DEEP_INTEL_CATEGORIES[c]["queries"])
+            for c in cats if c in DEEP_INTEL_CATEGORIES
+        )
+        print(f"\n{'='*60}")
+        print(f"Deep Competitive Intelligence: {self.company}")
+        print(f"Categories: {len(cats)} | Queries: ~{total_queries}")
+        print(f"Analysis: built-in{' + external LLM' if use_external_llm and self.llm.enabled else ''}")
+        print(f"{'='*60}\n")
+        for cat_key in cats:
+            cat = DEEP_INTEL_CATEGORIES.get(cat_key)
+            if not cat:
+                print(f"[SKIP] Unknown category: {cat_key}")
+                continue
+            section = IntelSection(category=cat_key, label=cat["label"])
+            self._research_category(section, cat, use_external_llm, delay, max_results)
+            self.sections[cat_key] = section
+        report_path = self._generate_report(use_external_llm)
+        self._save_data()
+        print(f"\n{'='*60}")
+        print(f"Report: {report_path}")
+        total_findings = sum(len(s.findings) for s in self.sections.values())
+        total_gaps = sum(len(s.gaps) for s in self.sections.values())
+        print(f"Findings: {total_findings} | Gaps: {total_gaps}")
+        print(f"{'='*60}\n")
+        return report_path
+    def _research_category(self, section: IntelSection, cat: dict,
+                           use_external_llm: bool, delay: float, max_results: int):
+        print(f"\n--- {section.label} ---")
+        queries = [q.format(company=self.company) for q in cat["queries"]]
+        if use_external_llm and self.llm.enabled:
+            extra = self.llm.generate_category_queries(self.company, section.label)
+            if extra:
+                queries.extend(extra)
+                print(f"  [EXTERNAL LLM] +{len(extra)} additional queries")
+        for query in queries:
+            print(f"  [SEARCH] {query}")
+            try:
+                results = self.search.search(query, max_results=max_results, save=True)
+                section.queries_executed.append(query)
+                section.results.extend(results)
+                for r in results:
+                    if r.url and r.url not in section.sources:
+                        section.sources.append(r.url)
+                print(f"           -> {len(results)} results")
+            except Exception as e:
+                print(f"           -> Error: {e}")
+            if delay > 0:
+                time.sleep(delay)
+        section.findings = self._analyze_section(section)
+        section.gaps = self._detect_gaps(section)
+        confirmed = sum(1 for f in section.findings if f.get("confirmed"))
+        speculative = len(section.findings) - confirmed
+        print(f"  [ANALYSIS] {len(section.findings)} findings ({confirmed} confirmed, {speculative} speculative)")
+        if section.gaps:
+            print(f"  [GAPS] {len(section.gaps)}: {', '.join(g['text'] for g in section.gaps[:3])}")
+        if self.llm.enabled and section.results:
+            print(f"  [SYNTHESIS] Synthesizing {section.label}...")
+            synthesis = self.llm.synthesize_intel(
+                self.company, section.category, section.label, section.results
+            )
+            synth_findings = synthesis.get("findings", [])
+            synth_gaps = synthesis.get("gaps", [])
+            if synth_findings:
+                synth_sources = {f.get("source", "") for f in synth_findings if f.get("source")}
+                for bf in section.findings:
+                    if bf.get("source") and bf["source"] not in synth_sources:
+                        synth_findings.append(bf)
+                section.findings = synth_findings
+                for f in synth_findings:
+                    if isinstance(f, dict):
+                        tag = "[CONFIRMED]" if f.get("confirmed") else "[SPECULATIVE]"
+                        print(f"    {tag} {f.get('text', '')[:80]}")
+            existing_gaps = {g["text"].lower() for g in section.gaps}
+            for sg in synth_gaps:
+                gap_text = sg.get("text", sg) if isinstance(sg, dict) else sg
+                if gap_text.lower() not in existing_gaps:
+                    section.gaps.append({"text": gap_text})
+    def _analyze_section(self, section: IntelSection) -> list[dict]:
+        findings = []
+        seen_keys = set()
+        aliases = self._get_aliases()
+        for r in section.results:
+            text_lower = f"{r.title} {r.snippet}".lower()
+            if not any(alias in text_lower for alias in aliases):
+                continue
+            dedup_key = re.sub(r'[^a-z0-9]', '', r.title.lower()[:50])
+            if dedup_key in seen_keys:
+                continue
+            seen_keys.add(dedup_key)
+            confirmed = self._is_primary_source(r.url)
+            title = r.title.strip()
+            snippet = r.snippet.strip()[:250]
+            finding_text = f"{title}: {snippet}" if snippet else title
+            findings.append({
+                "text": finding_text,
+                "source": r.url,
+                "confirmed": confirmed,
+            })
+        findings.sort(key=lambda f: (not f["confirmed"], -len(f["text"])))
+        return findings[:15]
+    def _is_primary_source(self, url: str) -> bool:
+        if not url:
+            return False
+        url_lower = url.lower()
+        for domain in PRIMARY_SOURCE_DOMAINS:
+            if domain in url_lower:
+                return True
+        for alias in self._get_aliases():
+            slug = alias.replace(" ", "")
+            if len(slug) >= 4 and slug in url_lower.split("/")[2] if len(url_lower.split("/")) > 2 else False:
+                return True
+        return False
+    def _detect_gaps(self, section: IntelSection) -> list[dict]:
+        expected = CATEGORY_EXPECTED.get(section.category, {})
+        if not expected:
+            return []
+        aliases = self._get_aliases()
+        relevant_text = " ".join(
+            f"{r.title} {r.snippet}"
+            for r in section.results
+            if any(a in f"{r.title} {r.snippet}".lower() for a in aliases)
+        )
+        if not relevant_text:
+            return [{"text": f"No relevant results found for {section.label}"}]
+        relevant_lower = relevant_text.lower()
+        gaps = []
+        for field_name, patterns in expected.items():
+            found = any(
+                re.search(p, relevant_lower, re.IGNORECASE)
+                for p in patterns
+            )
+            if not found:
+                label = field_name.replace("_", " ").replace("/", " / ")
+                gaps.append({"text": f"No data found for: {label}"})
+        return gaps
+    def _get_aliases(self) -> list[str]:
+        try:
+            from .extract import COMPANY_DEFINITIONS
+        except ImportError:
+            from extract import COMPANY_DEFINITIONS
+        info = COMPANY_DEFINITIONS.get(self.company, {})
+        aliases = info.get("aliases", [])
+        if not aliases:
+            aliases = [self.company.lower()]
+        return aliases
+    def _generate_report(self, use_external_llm: bool) -> Path:
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        slug = self.company.lower().replace(" ", "-").replace("/", "-")
+        slug = "".join(c for c in slug if c.isalnum() or c == "-")
+        report_path = self.output_dir / f"{timestamp}_{slug}_intel.md"
+        method = "Built-in analysis"
+        if LLM_ENABLED:
+            method += " + LLM synthesis (OpenRouter)"
+        if use_external_llm:
+            method += " + extra query generation"
+        lines = [
+            f"# Competitive Intelligence: {self.company}",
+            "",
+            f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}  ",
+            f"**Method:** {method}  ",
+            f"**Searches:** {sum(len(s.queries_executed) for s in self.sections.values())}  ",
+            f"**Sources:** {sum(len(s.sources) for s in self.sections.values())} unique URLs",
+            "",
+            "> **Legend:** [CONFIRMED] = from primary/verified source | [SPECULATIVE] = inferred or unverified",
+            "",
+            "---",
+        ]
+        for section in self.sections.values():
+            lines.append("")
+            lines.append(f"## {section.label}")
+            lines.append("")
+            if not section.findings:
+                lines.append("*No findings. Try broader queries or `--external-llm` for additional analysis.*")
+                lines.append("")
+                continue
+            for f in section.findings:
+                if isinstance(f, dict):
+                    tag = "[CONFIRMED]" if f.get("confirmed") else "[SPECULATIVE]"
+                    text = f.get("text", "")
+                    source = f.get("source", "")
+                    lines.append(f"- **{tag}** {text}")
+                    if source:
+                        lines.append(f"  - Source: {source}")
+                else:
+                    lines.append(f"- {f}")
+            if section.gaps:
+                lines.append("")
+                lines.append("**Knowledge Gaps:**")
+                for gap in section.gaps:
+                    gap_text = gap.get("text", gap) if isinstance(gap, dict) else gap
+                    lines.append(f"- [ ] {gap_text}")
+            lines.append("")
+            if section.sources:
+                lines.append(f"<details><summary>Sources ({len(section.sources)} URLs)</summary>")
+                lines.append("")
+                for url in section.sources[:10]:
+                    lines.append(f"- {url}")
+                if len(section.sources) > 10:
+                    lines.append(f"- ... and {len(section.sources) - 10} more")
+                lines.append("")
+                lines.append("</details>")
+                lines.append("")
+        lines.extend(["---", "", "## Summary", ""])
+        total_findings = sum(len(s.findings) for s in self.sections.values())
+        confirmed = sum(
+            sum(1 for f in s.findings if isinstance(f, dict) and f.get("confirmed"))
+            for s in self.sections.values()
+        )
+        speculative = total_findings - confirmed
+        lines.append(f"| Metric | Count |")
+        lines.append(f"|--------|-------|")
+        lines.append(f"| Total findings | {total_findings} |")
+        lines.append(f"| Confirmed | {confirmed} |")
+        lines.append(f"| Speculative | {speculative} |")
+        lines.append(f"| Categories | {len(self.sections)} |")
+        lines.append("")
+        all_gaps = []
+        for s in self.sections.values():
+            for g in s.gaps:
+                gap_text = g.get("text", g) if isinstance(g, dict) else g
+                all_gaps.append(f"{s.label}: {gap_text}")
+        if all_gaps:
+            lines.append("### Outstanding Gaps")
+            lines.append("")
+            for gap in all_gaps:
+                lines.append(f"- [ ] {gap}")
+            lines.append("")
+        with open(report_path, "w") as f:
+            f.write("\n".join(lines))
+        return report_path
+    def _save_data(self):
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        slug = self.company.lower().replace(" ", "-").replace("/", "-")
+        slug = "".join(c for c in slug if c.isalnum() or c == "-")
+        data = {
+            "company": self.company,
+            "generated": datetime.now().isoformat(),
+            "sections": {},
+        }
+        for cat_key, section in self.sections.items():
+            data["sections"][cat_key] = {
+                "label": section.label,
+                "queries_executed": section.queries_executed,
+                "finding_count": len(section.findings),
+                "findings": section.findings,
+                "gaps": section.gaps,
+                "source_count": len(section.sources),
+                "sources": section.sources[:20],
+            }
+        json_path = self.output_dir / f"{timestamp}_{slug}_intel.json"
+        with open(json_path, "w") as f:
+            json.dump(data, f, indent=2)

llm.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+OpenRouter LLM client for research analysis and synthesis.
+"""
+from __future__ import annotations
+import json
+import re
+import sys
+from typing import Optional
+try:
+    from .config import OPENROUTER_API_KEY, LLM_MODEL
+except ImportError:
+    from config import OPENROUTER_API_KEY, LLM_MODEL
+class LLMClient:
+    """OpenRouter LLM client for research analysis."""
+    def __init__(self, api_key: str = None, model: str = LLM_MODEL):
+        self.api_key = api_key or OPENROUTER_API_KEY
+        self.model = model
+        self.enabled = bool(self.api_key)
+    def call(self, prompt: str, system: str = None, max_tokens: int = 1000) -> Optional[str]:
+        if not self.enabled:
+            return None
+        import requests
+        messages = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": prompt})
+        try:
+            response = requests.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": self.model,
+                    "messages": messages,
+                    "max_tokens": max_tokens,
+                    "temperature": 0.3,
+                },
+                timeout=60
+            )
+            response.raise_for_status()
+            return response.json()["choices"][0]["message"]["content"]
+        except Exception as e:
+            print(f"[LLM ERROR] {e}", file=sys.stderr)
+            return None
+    def generate_category_queries(self, company: str, category_label: str) -> list[str]:
+        """Generate additional search queries for a specific intel category."""
+        system = (
+            "You are a competitive intelligence analyst specializing in "
+            "rehabilitation robotics and medical devices. "
+            "Generate specific, targeted web search queries. "
+            "Return ONLY a JSON array of query strings. "
+            "Focus on recent sources (last 18 months). Prioritize primary sources."
+        )
+        prompt = f"""Company: {company}
+Category: {category_label}
+Generate 3-4 additional specific search queries for deep competitive intelligence on this company in this category.
+Focus on primary sources: company blog, official announcements, SEC filings, patent databases, verified review sites, job postings.
+Return as JSON array: ["query1", "query2", ...]"""
+        response = self.call(prompt, system)
+        if response:
+            match = re.search(r'\[.*\]', response, re.DOTALL)
+            if match:
+                try:
+                    return json.loads(match.group())[:4]
+                except Exception:
+                    pass
+        return []
+    def synthesize_intel(self, company: str, category_key: str,
+                         category_label: str, results: list,
+                         synthesis_questions: dict = None) -> dict:
+        """Synthesize search results into structured intelligence.
+        Uses per-category questions to produce distilled, actionable findings.
+        Returns dict with 'findings' and 'gaps'.
+        """
+        try:
+            from .intel import CATEGORY_SYNTHESIS_QUESTIONS
+        except ImportError:
+            from intel import CATEGORY_SYNTHESIS_QUESTIONS
+        questions = (synthesis_questions or CATEGORY_SYNTHESIS_QUESTIONS).get(category_key, [])
+        if not questions:
+            return {"findings": [], "gaps": []}
+        questions_text = "\n".join(f"- {q}" for q in questions)
+        results_text = "\n".join([
+            f"- [{r.source}] {r.title}\n  {r.snippet[:300]}\n  URL: {r.url}"
+            for r in results[:15]
+        ])
+        system = (
+            "You are a competitive intelligence analyst for GURMA.ai, "
+            "a Swiss AI company entering rehabilitation robotics with "
+            "15 years of patient outcome data from BAMA Teknoloji. "
+            "Synthesize search results into actionable intelligence. "
+            "Recent sources only (last 18 months). "
+            "Flag speculation vs confirmed facts. Include URLs."
+        )
+        prompt = f"""Conduct deep competitive intelligence on {company}.
+Category: {category_label}
+Answer these specific questions based on the search results:
+{questions_text}
+Search results:
+{results_text}
+Return JSON:
+{{
+  "findings": [
+    {{"text": "synthesized answer to one of the questions", "confirmed": true, "source": "url"}},
+    {{"text": "inferred insight", "confirmed": false, "source": "url or empty"}}
+  ],
+  "gaps": [
+    {{"text": "question that could NOT be answered from search results"}}
+  ]
+}}
+Rules:
+- confirmed=true ONLY for facts from primary sources (company website, SEC filings, press releases)
+- confirmed=false for inferred or secondary-source information
+- Each finding should directly answer one of the questions above
+- Be specific and quantitative where possible
+- If a question cannot be answered, add it to gaps
+- Maximum 12 findings"""
+        response = self.call(prompt, system, max_tokens=2000)
+        if response:
+            match = re.search(r'\{.*\}', response, re.DOTALL)
+            if match:
+                try:
+                    return json.loads(match.group())
+                except Exception:
+                    pass
+        return {"findings": [], "gaps": []}

research.py CHANGED Viewed

@@ -1,1922 +1,70 @@
 #!/usr/bin/env python3
 """
-GURMA.ai Research Tool
-Unified research tool combining:
-- Multi-backend web search (DuckDuckGo, SerpAPI, Brave)
-- Result storage and retrieval
-- Batch research runs
-- Deep competitive intelligence with LLM synthesis (via OpenRouter)
-Usage:
-    # Single search
-    python research.py search "rehabilitation robotics market"
-    # Batch research on all competitors
-    python research.py batch
-    # Deep competitive intelligence on a single company
-    python research.py competitor "Ekso Bionics"
-    python research.py competitor "Fourier Intelligence" --external-llm
-    python research.py competitor "Cyberdyne" -c company_overview,product_deep_dive
-    python research.py competitor --list-categories
-    # Extract to competitors.json (for dashboard)
-    python research.py extract
-    # List saved results
-    python research.py list
 """
-from __future__ import annotations
-import argparse
-import json
-import os
-import re
-import sys
-import time
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field, asdict
-from datetime import datetime, timedelta
-from pathlib import Path
-from typing import Optional, Protocol
-# ============================================================
-# Configuration
-# ============================================================
-# Detect environment: HF Space (Docker at /app) vs local development
-def _detect_project_root() -> Path:
-    """Detect project root based on environment."""
-    # HF Spaces: running from /app with research.py in root
-    if os.getenv("HF_SPACE") or Path("/app/research.py").exists():
-        return Path("/app")
-    # Local: research.py is in src/utils/
-    return Path(__file__).parent.parent.parent
-PROJECT_ROOT = _detect_project_root()
-IS_HF_SPACE = PROJECT_ROOT == Path("/app")
-# Load .env if present (local development)
-if not IS_HF_SPACE:
-    try:
-        from dotenv import load_dotenv
-        load_dotenv(PROJECT_ROOT / ".env")
-    except ImportError:
-        pass
-# Directories - different structure for HF Space vs local
-if IS_HF_SPACE:
-    RESEARCH_DIR = PROJECT_ROOT / "data"
-    DATA_DIR = PROJECT_ROOT / "data"
-else:
-    RESEARCH_DIR = PROJECT_ROOT / "data"
-    DATA_DIR = PROJECT_ROOT / "src" / "dashboard"
-# Ensure directories exist
-RESEARCH_DIR.mkdir(parents=True, exist_ok=True)
-DATA_DIR.mkdir(parents=True, exist_ok=True)
-# API Keys
-SERPAPI_KEY = os.getenv("SERPAPI_KEY")
-BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
-OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
-# LLM Config
-LLM_MODEL = "deepseek/deepseek-chat"
-LLM_ENABLED = bool(OPENROUTER_API_KEY)
-# Known competitors for batch research
-COMPETITORS = [
-    "Hocoma", "Ekso Bionics", "Lifeward ReWalk", "Fourier Intelligence",
-    "Cyberdyne HAL", "Wandercraft", "Myomo", "Bionik",
-]
-# Query templates for batch research
-BATCH_QUERY_TEMPLATES = [
-    "{company} latest news 2025 2026",
-    "{company} funding investors valuation",
-    "{company} FDA approval regulatory",
-    "{company} partnerships collaborations",
-    "{company} AI machine learning technology",
-    # Targeted regulatory sources
-    "site:accessdata.fda.gov {company}",  # FDA 510(k) clearances
-    "site:clinicaltrials.gov {company} rehabilitation",  # Clinical trials
-    # Funding & corporate
-    "site:crunchbase.com {company}",  # Funding history
-    "site:sec.gov {company} 10-K OR 8-K",  # SEC filings (public companies)
-    # Patents & innovation
-    "site:patents.google.com {company} exoskeleton OR rehabilitation",
-]
-MARKET_QUERIES = [
-    "rehabilitation robotics market size 2026 forecast",
-    "exoskeleton market growth AI integration",
-    "rehabilitation robotics insurance reimbursement",
-    "medical exoskeleton FDA approval 2025",
-    "stroke rehabilitation AI technology",
-    "spinal cord injury exoskeleton treatment",
-    "rehabilitation robotics competitive landscape",
-    # Industry publications
-    "site:exoskeletonreport.com 2025 2026",  # Industry news
-    "site:medgadget.com exoskeleton rehabilitation",  # Med-tech news
-    # Regulatory landscape
-    "site:fda.gov rehabilitation robotics guidance",
-    "MDR medical device regulation exoskeleton CE mark 2025",
-    # Academic/clinical
-    "site:pubmed.ncbi.nlm.nih.gov rehabilitation robotics AI 2024 2025",
-    # Insurance/reimbursement (key for Holland market)
-    "exoskeleton insurance coverage CMS reimbursement code",
-    "rehabilitation robotics HCPCS code billing",
-]
-# Deep competitive intelligence query templates by category
-# Tailored for rehabilitation robotics / medical device companies (~25 queries)
-DEEP_INTEL_CATEGORIES = {
-    "company_overview": {
-        "label": "Company Overview",
-        "queries": [
-            "{company} founding history milestones",
-            "{company} CEO leadership team background",
-            "{company} funding rounds investors valuation",
-            "{company} employee count headcount growth",
-        ],
-    },
-    "product_technology": {
-        "label": "Product & Technology",
-        "queries": [
-            "{company} exoskeleton rehabilitation robot product specifications",
-            "{company} AI machine learning technology capabilities",
-            "{company} new product launch release 2025 2026",
-            "{company} patent filings exoskeleton rehabilitation innovation",
-            "site:patents.google.com {company} exoskeleton OR rehabilitation",
-        ],
-    },
-    "regulatory_clinical": {
-        "label": "Regulatory & Clinical",
-        "queries": [
-            "site:accessdata.fda.gov {company}",
-            "{company} FDA 510k clearance CE mark MDR approval",
-            "site:clinicaltrials.gov {company} rehabilitation",
-            "{company} clinical outcomes study peer-reviewed results",
-        ],
-    },
-    "market_channels": {
-        "label": "Market & Channels",
-        "queries": [
-            "{company} hospital clinic installations customer base",
-            "{company} insurance reimbursement coverage CMS",
-            "{company} partnerships distributors resellers",
-            "{company} conference MEDICA ACRM CES 2025 2026",
-        ],
-    },
-    "vulnerabilities_threats": {
-        "label": "Vulnerabilities & Threats",
-        "queries": [
-            "{company} weaknesses problems criticism recall",
-            "{company} layoffs restructuring financial difficulty",
-            "{company} Glassdoor employee reviews satisfaction",
-            "{company} rehabilitation robotics AI expansion strategy 2025 2026",
-            "{company} acquisitions mergers market share growth",
-            "site:sec.gov {company} 10-K OR 8-K",
-        ],
-    },
-}
-# Primary/authoritative source domains for confirmed vs speculative scoring
-PRIMARY_SOURCE_DOMAINS = {
-    # Regulatory / Official
-    "sec.gov", "fda.gov", "clinicaltrials.gov", "patents.google.com",
-    "accessdata.fda.gov",
-    # Financial / Business data
-    "crunchbase.com", "tracxn.com", "pitchbook.com", "cbinsights.com",
-    "bloomberg.com", "reuters.com", "wsj.com", "finance.yahoo.com",
-    "wellfound.com",
-    # Professional
-    "linkedin.com", "glassdoor.com",
-    # Review platforms
-    "g2.com", "capterra.com", "trustpilot.com",
-    # Industry-specific
-    "therobotreport.com", "exoskeletonreport.com", "medgadget.com",
-}
-# Expected data points per category — used for automatic gap detection.
-# Each field maps to regex patterns that indicate coverage in result text.
-CATEGORY_EXPECTED = {
-    "company_overview": {
-        "founding_year": [r'(?:founded|established|incorporated|started)\s+(?:in\s+)?(\d{4})'],
-        "leadership": [r'(?:CEO|Chief Executive|CTO|CFO|President|Founder|Chairman|COO)'],
-        "funding": [r'\$[\d,.]+\s*(?:million|billion|M|B)', r'(?:series\s+[A-F]|seed|IPO|funding\s+round)'],
-        "employees": [r'(\d[\d,]*)\s*(?:employees|staff|headcount|team\s+members|workers)'],
-    },
-    "product_technology": {
-        "products": [r'(?:product|device|robot|exoskeleton|system)\s'],
-        "technology": [r'(?:AI|machine\s+learning|deep\s+learning|sensor|actuator|algorithm|neural)'],
-        "patents": [r'(?:patent|IP|intellectual\s+property|invention)'],
-        "recent_launches": [r'(?:launch|release|announc|unveil|introduc)\w*\s+.{0,30}(?:2025|2026)'],
-    },
-    "regulatory_clinical": {
-        "fda_clearance": [r'(?:510\(?k\)?|FDA.?clear|FDA.?approv|de\s*novo)'],
-        "ce_mark": [r'(?:CE.?mark|MDR|EU.?approv|notified.?body)'],
-        "clinical_trials": [r'(?:clinical.?trial|NCT\d|randomized|controlled.?study|peer.?review)'],
-        "clinical_outcomes": [r'(?:outcome|efficacy|recovery.?rate|improvement|functional.?score)'],
-    },
-    "market_channels": {
-        "installations": [r'(?:hospital|clinic|center|install|deploy|site)\s'],
-        "reimbursement": [r'(?:reimburse|insurance|CMS|Medicare|Medicaid|HCPCS|coverage|payer)'],
-        "partnerships": [r'(?:partner|alliance|collaborat|distribut|reseller|dealer)'],
-        "events": [r'(?:conference|MEDICA|ACRM|CES|expo|trade\s+show|summit)'],
-    },
-    "vulnerabilities_threats": {
-        "weaknesses": [r'(?:weakness|problem|challenge|struggle|fail|recall|warning)'],
-        "financial_stress": [r'(?:layoff|restructur|loss|declining|debt|delist|penny.stock)'],
-        "employee_sentiment": [r'(?:glassdoor|employee.?review|work.?culture|turnover)'],
-        "expansion": [r'(?:expansion|new.?market|acqui|merger|market.?share|growth.?strategy)'],
-    },
-}
-# Per-category synthesis questions — the LLM answers these from search results.
-# Tailored for rehabilitation robotics / medical device competitors.
-CATEGORY_SYNTHESIS_QUESTIONS = {
-    "company_overview": [
-        "Founding story and key milestones",
-        "Leadership team (backgrounds, medical device experience)",
-        "Funding history (rounds, investors, valuations)",
-        "Employee count and growth trajectory",
-    ],
-    "product_technology": [
-        "Product catalog (devices, indications, patient populations)",
-        "AI / machine learning capabilities (data they train on, algorithms used)",
-        "Recent product launches and roadmap clues (last 12 months)",
-        "Patent portfolio and innovation direction",
-        "How does their technology compare to GURMA.ai's outcome-data approach?",
-    ],
-    "regulatory_clinical": [
-        "FDA clearances (510(k) numbers, De Novo, dates)",
-        "CE mark / MDR status in Europe",
-        "Active clinical trials (ClinicalTrials.gov entries, endpoints)",
-        "Published clinical outcomes (peer-reviewed studies, recovery rates)",
-        "Reimbursement status (CMS, Medicare, private payer coverage)",
-    ],
-    "market_channels": [
-        "Hospital and clinic installations (how many sites, which countries)",
-        "Insurance and reimbursement strategy (pricing, payer relationships)",
-        "Distribution partnerships and reseller network",
-        "Conference and KOL presence (MEDICA, ACRM, physician endorsements)",
-    ],
-    "vulnerabilities_threats": [
-        "What are they bad at? (clinical limitations, missing indications)",
-        "Financial health (SEC filings, cash burn, stock trajectory)",
-        "Employee sentiment (Glassdoor, hiring patterns, layoffs)",
-        "Growth strategy (acquisitions, new markets, AI investments)",
-        "What could they do that would hurt GURMA.ai most?",
-        "Early warning signals to monitor",
-    ],
-}
-# ============================================================
-# Search Backends (Open/Closed Principle)
-# ============================================================
-class SearchResult(Protocol):
-    """Protocol for search result."""
-    title: str
-    url: str
-    snippet: str
-    source: str
-@dataclass
-class WebSearchResult:
-    """Standard search result."""
-    title: str
-    url: str
-    snippet: str
-    source: str
-class SearchBackend(ABC):
-    """Abstract base for search backends (Liskov Substitution)."""
-    @property
-    @abstractmethod
-    def name(self) -> str:
-        """Backend identifier."""
-        pass
-    @abstractmethod
-    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
-        """Execute search and return results."""
-        pass
-    @abstractmethod
-    def is_available(self) -> bool:
-        """Check if backend is available (dependencies, API keys)."""
-        pass
-class DuckDuckGoBackend(SearchBackend):
-    """DuckDuckGo search (no API key required)."""
-    @property
-    def name(self) -> str:
-        return "duckduckgo"
-    def is_available(self) -> bool:
-        try:
-            from ddgs import DDGS
-            return True
-        except ImportError:
-            try:
-                from duckduckgo_search import DDGS
-                return True
-            except ImportError:
-                return False
-    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
-        try:
-            from ddgs import DDGS
-        except ImportError:
-            from duckduckgo_search import DDGS
-        results = []
-        ddgs = DDGS()
-        for r in ddgs.text(query, max_results=max_results):
-            results.append(WebSearchResult(
-                title=r.get("title", ""),
-                url=r.get("href", r.get("link", "")),
-                snippet=r.get("body", r.get("snippet", "")),
-                source=self.name
-            ))
-        return results
-class SerpAPIBackend(SearchBackend):
-    """SerpAPI search (requires API key)."""
-    @property
-    def name(self) -> str:
-        return "serpapi"
-    def is_available(self) -> bool:
-        try:
-            import requests
-            return bool(SERPAPI_KEY)
-        except ImportError:
-            return False
-    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
-        import requests
-        response = requests.get(
-            "https://serpapi.com/search",
-            params={"q": query, "api_key": SERPAPI_KEY, "engine": "google", "num": max_results},
-            timeout=30
-        )
-        response.raise_for_status()
-        data = response.json()
-        results = []
-        for r in data.get("organic_results", [])[:max_results]:
-            results.append(WebSearchResult(
-                title=r.get("title", ""),
-                url=r.get("link", ""),
-                snippet=r.get("snippet", ""),
-                source=self.name
-            ))
-        return results
-class BraveBackend(SearchBackend):
-    """Brave search (requires API key)."""
-    @property
-    def name(self) -> str:
-        return "brave"
-    def is_available(self) -> bool:
-        try:
-            import requests
-            return bool(BRAVE_API_KEY)
-        except ImportError:
-            return False
-    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
-        import requests
-        response = requests.get(
-            "https://api.search.brave.com/res/v1/web/search",
-            headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_API_KEY},
-            params={"q": query, "count": min(max_results, 20)},
-            timeout=30
-        )
-        response.raise_for_status()
-        data = response.json()
-        results = []
-        for r in data.get("web", {}).get("results", [])[:max_results]:
-            results.append(WebSearchResult(
-                title=r.get("title", ""),
-                url=r.get("url", ""),
-                snippet=r.get("description", ""),
-                source=self.name
-            ))
-        return results
-# Backend registry
-BACKENDS: dict[str, SearchBackend] = {
-    "duckduckgo": DuckDuckGoBackend(),
-    "ddg": DuckDuckGoBackend(),
-    "serpapi": SerpAPIBackend(),
-    "brave": BraveBackend(),
-}
-def get_backend(name: str = "duckduckgo") -> SearchBackend:
-    """Get search backend by name."""
-    backend = BACKENDS.get(name)
-    if not backend:
-        raise ValueError(f"Unknown backend: {name}. Available: {list(BACKENDS.keys())}")
-    if not backend.is_available():
-        raise RuntimeError(f"Backend '{name}' not available. Check dependencies/API keys.")
-    return backend
-# ============================================================
-# Result Storage (Single Responsibility)
-# ============================================================
-class ResultStorage:
-    """Handles saving and loading search results."""
-    def __init__(self, directory: Path = RESEARCH_DIR):
-        self.directory = directory
-        self.directory.mkdir(parents=True, exist_ok=True)
-    def save(self, query: str, results: list[WebSearchResult], backend: str) -> tuple[Path, Path]:
-        """Save results in JSON and Markdown formats. Returns (json_path, md_path)."""
-        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-        slug = self._slugify(query)
-        base_name = f"{timestamp}_{slug}"
-        # Build data
-        data = {
-            "query": query,
-            "timestamp": datetime.now().isoformat(),
-            "backend": backend,
-            "result_count": len(results),
-            "results": [asdict(r) for r in results]
-        }
-        # Save JSON
-        json_path = self.directory / f"{base_name}.json"
-        with open(json_path, "w") as f:
-            json.dump(data, f, indent=2)
-        # Save Markdown
-        md_path = self.directory / f"{base_name}.md"
-        with open(md_path, "w") as f:
-            f.write(f"# Search: {query}\n\n")
-            f.write(f"**Date:** {data['timestamp']}  \n")
-            f.write(f"**Backend:** {backend}  \n")
-            f.write(f"**Results:** {len(results)}\n\n---\n")
-            for i, r in enumerate(results, 1):
-                f.write(f"\n## {i}. {r.title}\n\n**URL:** {r.url}\n\n{r.snippet}\n")
-        return json_path, md_path
-    def list_searches(self, limit: int = 20) -> list[dict]:
-        """List recent saved searches."""
-        searches = []
-        for json_file in sorted(self.directory.glob("*.json"), reverse=True):
-            if json_file.name.startswith("."):
-                continue
-            try:
-                with open(json_file) as f:
-                    data = json.load(f)
-                    searches.append({
-                        "file": json_file.name,
-                        "query": data.get("query", ""),
-                        "timestamp": data.get("timestamp", ""),
-                        "results": data.get("result_count", 0)
-                    })
-            except:
-                pass
-            if len(searches) >= limit:
-                break
-        return searches
-    def get_recent_queries(self, days: int = 7) -> set[str]:
-        """Get queries executed within the last N days (normalized for deduplication)."""
-        cutoff = datetime.now() - timedelta(days=days)
-        recent = set()
-        for json_file in self.directory.glob("*.json"):
-            if json_file.name.startswith("."):
-                continue
-            try:
-                with open(json_file) as f:
-                    data = json.load(f)
-                    ts = data.get("timestamp", "")
-                    if ts:
-                        file_date = datetime.fromisoformat(ts.replace("Z", "+00:00").split("+")[0])
-                        if file_date >= cutoff:
-                            query = data.get("query", "").lower().strip()
-                            recent.add(query)
-            except:
-                pass
-        return recent
-    def _slugify(self, text: str, max_len: int = 50) -> str:
-        """Convert text to filesystem-safe slug."""
-        slug = text.lower()[:max_len].replace(" ", "-").replace("/", "-")
-        return "".join(c for c in slug if c.isalnum() or c == "-")
-# ============================================================
-# Search Service (Facade Pattern)
-# ============================================================
-class SearchService:
-    """High-level search interface combining backend and storage."""
-    def __init__(self, backend: str = "duckduckgo", storage: ResultStorage = None):
-        self.backend = get_backend(backend)
-        self.storage = storage or ResultStorage()
-    def search(self, query: str, max_results: int = 10, save: bool = True) -> list[WebSearchResult]:
-        """Execute search, optionally save results."""
-        results = self.backend.search(query, max_results)
-        if save and results:
-            self.storage.save(query, results, self.backend.name)
-        return results
-    def search_batch(self, queries: list[str], max_results: int = 10,
-                     delay: float = 0.5, callback=None) -> dict[str, int]:
-        """Execute multiple searches with rate limiting.
-        Returns dict of {query: result_count}.
-        """
-        stats = {}
-        for i, query in enumerate(queries, 1):
-            if callback:
-                callback(i, len(queries), query)
-            try:
-                results = self.search(query, max_results, save=True)
-                stats[query] = len(results)
-            except Exception as e:
-                stats[query] = -1  # Error indicator
-                print(f"Error on '{query}': {e}", file=sys.stderr)
-            if delay > 0 and i < len(queries):
-                time.sleep(delay)
-        return stats
-# ============================================================
-# LLM Integration (Dependency Inversion)
-# ============================================================
-class LLMClient:
-    """OpenRouter LLM client for research analysis."""
-    def __init__(self, api_key: str = None, model: str = LLM_MODEL):
-        self.api_key = api_key or OPENROUTER_API_KEY
-        self.model = model
-        self.enabled = bool(self.api_key)
-    def call(self, prompt: str, system: str = None, max_tokens: int = 1000) -> Optional[str]:
-        """Make LLM API call. Returns response text or None."""
-        if not self.enabled:
-            return None
-        import requests
-        messages = []
-        if system:
-            messages.append({"role": "system", "content": system})
-        messages.append({"role": "user", "content": prompt})
-        try:
-            response = requests.post(
-                "https://openrouter.ai/api/v1/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {self.api_key}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": self.model,
-                    "messages": messages,
-                    "max_tokens": max_tokens,
-                    "temperature": 0.3,
-                },
-                timeout=60
-            )
-            response.raise_for_status()
-            return response.json()["choices"][0]["message"]["content"]
-        except Exception as e:
-            print(f"[LLM ERROR] {e}", file=sys.stderr)
-            return None
-    def generate_category_queries(self, company: str, category_label: str) -> list[str]:
-        """Generate additional search queries for a specific intel category."""
-        system = (
-            "You are a competitive intelligence analyst specializing in "
-            "rehabilitation robotics and medical devices. "
-            "Generate specific, targeted web search queries. "
-            "Return ONLY a JSON array of query strings. "
-            "Focus on recent sources (last 18 months). Prioritize primary sources."
-        )
-        prompt = f"""Company: {company}
-Category: {category_label}
-Generate 3-4 additional specific search queries for deep competitive intelligence on this company in this category.
-Focus on primary sources: company blog, official announcements, SEC filings, patent databases, verified review sites, job postings.
-Return as JSON array: ["query1", "query2", ...]"""
-        response = self.call(prompt, system)
-        if response:
-            match = re.search(r'\[.*\]', response, re.DOTALL)
-            if match:
-                try:
-                    return json.loads(match.group())[:4]
-                except Exception:
-                    pass
-        return []
-    def synthesize_intel(self, company: str, category_key: str,
-                         category_label: str, results: list) -> dict:
-        """Synthesize search results into structured intelligence.
-        Uses per-category questions from CATEGORY_SYNTHESIS_QUESTIONS to
-        produce distilled, actionable findings instead of raw snippets.
-        Returns dict with 'findings' and 'gaps'.
-        """
-        questions = CATEGORY_SYNTHESIS_QUESTIONS.get(category_key, [])
-        if not questions:
-            return {"findings": [], "gaps": []}
-        questions_text = "\n".join(f"- {q}" for q in questions)
-        results_text = "\n".join([
-            f"- [{r.source}] {r.title}\n  {r.snippet[:300]}\n  URL: {r.url}"
-            for r in results[:15]
-        ])
-        system = (
-            "You are a competitive intelligence analyst for GURMA.ai, "
-            "a Swiss AI company entering rehabilitation robotics with "
-            "15 years of patient outcome data from BAMA Teknoloji. "
-            "Synthesize search results into actionable intelligence. "
-            "Recent sources only (last 18 months). "
-            "Flag speculation vs confirmed facts. Include URLs."
-        )
-        prompt = f"""Conduct deep competitive intelligence on {company}.
-Category: {category_label}
-Answer these specific questions based on the search results:
-{questions_text}
-Search results:
-{results_text}
-Return JSON:
-{{
-  "findings": [
-    {{"text": "synthesized answer to one of the questions", "confirmed": true, "source": "url"}},
-    {{"text": "inferred insight", "confirmed": false, "source": "url or empty"}}
-  ],
-  "gaps": [
-    {{"text": "question that could NOT be answered from search results"}}
-  ]
-}}
-Rules:
-- confirmed=true ONLY for facts from primary sources (company website, SEC filings, press releases)
-- confirmed=false for inferred or secondary-source information
-- Each finding should directly answer one of the questions above
-- Be specific and quantitative where possible
-- If a question cannot be answered, add it to gaps
-- Maximum 12 findings"""
-        response = self.call(prompt, system, max_tokens=2000)
-        if response:
-            match = re.search(r'\{.*\}', response, re.DOTALL)
-            if match:
-                try:
-                    return json.loads(match.group())
-                except Exception:
-                    pass
-        return {"findings": [], "gaps": []}
-# ============================================================
-# Deep Competitive Intelligence Agent
-# ============================================================
-@dataclass
-class IntelSection:
-    """A section of the competitive intelligence report."""
-    category: str
-    label: str
-    queries_executed: list = field(default_factory=list)
-    results: list = field(default_factory=list)
-    findings: list = field(default_factory=list)
-    gaps: list = field(default_factory=list)
-    sources: list = field(default_factory=list)
-class CompetitorIntelAgent:
-    """Deep competitive intelligence agent for a single competitor.
-    Runs structured research across 7 categories and produces
-    a markdown + JSON report with [CONFIRMED]/[SPECULATIVE] tagging.
-    Usage:
-        agent = CompetitorIntelAgent("Ekso Bionics")
-        report = agent.run()  # built-in analysis
-        report = agent.run(use_external_llm=True)  # + OpenRouter
-    """
-    def __init__(self, company: str, search: SearchService = None, llm: LLMClient = None):
-        self.company = company
-        self.search = search or SearchService()
-        self.llm = llm or LLMClient()
-        self.sections: dict[str, IntelSection] = {}
-        self.output_dir = RESEARCH_DIR / "intel"
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-    def run(self, categories: list[str] = None, use_external_llm: bool = False,
-            delay: float = 1.0, max_results: int = 10) -> Path:
-        """Run deep competitive intelligence and generate report.
-        Built-in analysis (source scoring, dedup, gap detection) always runs.
-        Args:
-            categories: Which categories to research (default: all 7)
-            use_external_llm: Also use external LLM (OpenRouter) for enhanced analysis
-            delay: Delay between searches in seconds (rate limiting)
-            max_results: Max results per search query
-        Returns: Path to generated markdown report
-        """
-        cats = categories or list(DEEP_INTEL_CATEGORIES.keys())
-        total_queries = sum(
-            len(DEEP_INTEL_CATEGORIES[c]["queries"])
-            for c in cats if c in DEEP_INTEL_CATEGORIES
-        )
-        print(f"\n{'='*60}")
-        print(f"Deep Competitive Intelligence: {self.company}")
-        print(f"Categories: {len(cats)} | Queries: ~{total_queries}")
-        print(f"Analysis: built-in{' + external LLM' if use_external_llm and self.llm.enabled else ''}")
-        print(f"{'='*60}\n")
-        for cat_key in cats:
-            cat = DEEP_INTEL_CATEGORIES.get(cat_key)
-            if not cat:
-                print(f"[SKIP] Unknown category: {cat_key}")
-                continue
-            section = IntelSection(category=cat_key, label=cat["label"])
-            self._research_category(section, cat, use_external_llm, delay, max_results)
-            self.sections[cat_key] = section
-        report_path = self._generate_report(use_external_llm)
-        self._save_data()
-        print(f"\n{'='*60}")
-        print(f"Report: {report_path}")
-        total_findings = sum(len(s.findings) for s in self.sections.values())
-        total_gaps = sum(len(s.gaps) for s in self.sections.values())
-        print(f"Findings: {total_findings} | Gaps: {total_gaps}")
-        print(f"{'='*60}\n")
-        return report_path
-    def _research_category(self, section: IntelSection, cat: dict,
-                           use_external_llm: bool, delay: float, max_results: int):
-        """Research a single category: generate queries, search, analyze.
-        Built-in analysis (source scoring, dedup, gap detection) always runs.
-        External LLM (OpenRouter) is an optional enhancement on top.
-        """
-        print(f"\n--- {section.label} ---")
-        queries = [q.format(company=self.company) for q in cat["queries"]]
-        # External LLM can generate additional targeted queries
-        if use_external_llm and self.llm.enabled:
-            extra = self.llm.generate_category_queries(self.company, section.label)
-            if extra:
-                queries.extend(extra)
-                print(f"  [EXTERNAL LLM] +{len(extra)} additional queries")
-        for query in queries:
-            print(f"  [SEARCH] {query}")
-            try:
-                results = self.search.search(query, max_results=max_results, save=True)
-                section.queries_executed.append(query)
-                section.results.extend(results)
-                for r in results:
-                    if r.url and r.url not in section.sources:
-                        section.sources.append(r.url)
-                print(f"           -> {len(results)} results")
-            except Exception as e:
-                print(f"           -> Error: {e}")
-            if delay > 0:
-                time.sleep(delay)
-        # Always run built-in analysis (no external API needed)
-        section.findings = self._analyze_section(section)
-        section.gaps = self._detect_gaps(section)
-        confirmed = sum(1 for f in section.findings if f.get("confirmed"))
-        speculative = len(section.findings) - confirmed
-        print(f"  [ANALYSIS] {len(section.findings)} findings ({confirmed} confirmed, {speculative} speculative)")
-        if section.gaps:
-            print(f"  [GAPS] {len(section.gaps)}: {', '.join(g['text'] for g in section.gaps[:3])}")
-        # LLM synthesis — automatic when OpenRouter is available
-        if self.llm.enabled and section.results:
-            print(f"  [SYNTHESIS] Synthesizing {section.label}...")
-            synthesis = self.llm.synthesize_intel(
-                self.company, section.category, section.label, section.results
-            )
-            synth_findings = synthesis.get("findings", [])
-            synth_gaps = synthesis.get("gaps", [])
-            if synth_findings:
-                # Synthesized findings are distilled answers — use them as primary.
-                # Append any built-in findings from sources the LLM missed.
-                synth_sources = {f.get("source", "") for f in synth_findings if f.get("source")}
-                for bf in section.findings:
-                    if bf.get("source") and bf["source"] not in synth_sources:
-                        synth_findings.append(bf)
-                section.findings = synth_findings
-                for f in synth_findings:
-                    if isinstance(f, dict):
-                        tag = "[CONFIRMED]" if f.get("confirmed") else "[SPECULATIVE]"
-                        print(f"    {tag} {f.get('text', '')[:80]}")
-            # Merge gaps from synthesis with built-in gaps
-            existing_gaps = {g["text"].lower() for g in section.gaps}
-            for sg in synth_gaps:
-                gap_text = sg.get("text", sg) if isinstance(sg, dict) else sg
-                if gap_text.lower() not in existing_gaps:
-                    section.gaps.append({"text": gap_text})
-    def _analyze_section(self, section: IntelSection) -> list[dict]:
-        """Built-in smart analysis: source scoring, dedup, structured extraction.
-        This runs without any external LLM. It:
-        1. Filters results to those mentioning the company
-        2. Scores each source as confirmed (primary) or speculative (secondary)
-        3. Deduplicates by title similarity
-        4. Returns structured findings capped at 15 per section
-        """
-        findings = []
-        seen_keys = set()
-        aliases = self._get_aliases()
-        for r in section.results:
-            text_lower = f"{r.title} {r.snippet}".lower()
-            # Only include results that mention the company
-            if not any(alias in text_lower for alias in aliases):
-                continue
-            # Deduplicate by normalized title prefix
-            dedup_key = re.sub(r'[^a-z0-9]', '', r.title.lower()[:50])
-            if dedup_key in seen_keys:
-                continue
-            seen_keys.add(dedup_key)
-            # Score source quality
-            confirmed = self._is_primary_source(r.url)
-            # Clean finding text
-            title = r.title.strip()
-            snippet = r.snippet.strip()[:250]
-            finding_text = f"{title}: {snippet}" if snippet else title
-            findings.append({
-                "text": finding_text,
-                "source": r.url,
-                "confirmed": confirmed,
-            })
-        # Sort: confirmed first, then by text length (richer content first)
-        findings.sort(key=lambda f: (not f["confirmed"], -len(f["text"])))
-        return findings[:15]
-    def _is_primary_source(self, url: str) -> bool:
-        """Score whether a URL is a primary/authoritative source.
-        Primary = company's own site, regulatory filings, financial databases,
-        established industry publications, review platforms.
-        """
-        if not url:
-            return False
-        url_lower = url.lower()
-        # Check known primary domains
-        for domain in PRIMARY_SOURCE_DOMAINS:
-            if domain in url_lower:
-                return True
-        # Check if it's the company's own domain
-        for alias in self._get_aliases():
-            # Normalize: "ekso bionics" -> "eksobionics", "ekso"
-            slug = alias.replace(" ", "")
-            if len(slug) >= 4 and slug in url_lower.split("/")[2] if len(url_lower.split("/")) > 2 else False:
-                return True
-        return False
-    def _detect_gaps(self, section: IntelSection) -> list[dict]:
-        """Detect missing data points for this category.
-        Checks findings text against expected patterns per category.
-        Returns list of gap dicts for fields with no matching data.
-        """
-        expected = CATEGORY_EXPECTED.get(section.category, {})
-        if not expected:
-            return []
-        # Build text corpus from company-relevant results only
-        aliases = self._get_aliases()
-        relevant_text = " ".join(
-            f"{r.title} {r.snippet}"
-            for r in section.results
-            if any(a in f"{r.title} {r.snippet}".lower() for a in aliases)
-        )
-        if not relevant_text:
-            return [{"text": f"No relevant results found for {section.label}"}]
-        relevant_lower = relevant_text.lower()
-        gaps = []
-        for field_name, patterns in expected.items():
-            found = any(
-                re.search(p, relevant_lower, re.IGNORECASE)
-                for p in patterns
-            )
-            if not found:
-                label = field_name.replace("_", " ").replace("/", " / ")
-                gaps.append({"text": f"No data found for: {label}"})
-        return gaps
-    def _get_aliases(self) -> list[str]:
-        """Get lowercase company aliases for text matching."""
-        info = COMPANY_DEFINITIONS.get(self.company, {})
-        aliases = info.get("aliases", [])
-        if not aliases:
-            aliases = [self.company.lower()]
-        return aliases
-    def _generate_report(self, use_external_llm: bool) -> Path:
-        """Generate structured markdown report."""
-        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-        slug = self.company.lower().replace(" ", "-").replace("/", "-")
-        slug = "".join(c for c in slug if c.isalnum() or c == "-")
-        report_path = self.output_dir / f"{timestamp}_{slug}_intel.md"
-        method = "Built-in analysis"
-        if LLM_ENABLED:
-            method += " + LLM synthesis (OpenRouter)"
-        if use_external_llm:
-            method += " + extra query generation"
-        lines = [
-            f"# Competitive Intelligence: {self.company}",
-            "",
-            f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}  ",
-            f"**Method:** {method}  ",
-            f"**Searches:** {sum(len(s.queries_executed) for s in self.sections.values())}  ",
-            f"**Sources:** {sum(len(s.sources) for s in self.sections.values())} unique URLs",
-            "",
-            "> **Legend:** [CONFIRMED] = from primary/verified source | [SPECULATIVE] = inferred or unverified",
-            "",
-            "---",
-        ]
-        for section in self.sections.values():
-            lines.append("")
-            lines.append(f"## {section.label}")
-            lines.append("")
-            if not section.findings:
-                lines.append("*No findings. Try broader queries or `--external-llm` for additional analysis.*")
-                lines.append("")
-                continue
-            for f in section.findings:
-                if isinstance(f, dict):
-                    tag = "[CONFIRMED]" if f.get("confirmed") else "[SPECULATIVE]"
-                    text = f.get("text", "")
-                    source = f.get("source", "")
-                    lines.append(f"- **{tag}** {text}")
-                    if source:
-                        lines.append(f"  - Source: {source}")
-                else:
-                    lines.append(f"- {f}")
-            if section.gaps:
-                lines.append("")
-                lines.append("**Knowledge Gaps:**")
-                for gap in section.gaps:
-                    gap_text = gap.get("text", gap) if isinstance(gap, dict) else gap
-                    lines.append(f"- [ ] {gap_text}")
-            lines.append("")
-            if section.sources:
-                lines.append(f"<details><summary>Sources ({len(section.sources)} URLs)</summary>")
-                lines.append("")
-                for url in section.sources[:10]:
-                    lines.append(f"- {url}")
-                if len(section.sources) > 10:
-                    lines.append(f"- ... and {len(section.sources) - 10} more")
-                lines.append("")
-                lines.append("</details>")
-                lines.append("")
-        # Summary
-        lines.extend(["---", "", "## Summary", ""])
-        total_findings = sum(len(s.findings) for s in self.sections.values())
-        confirmed = sum(
-            sum(1 for f in s.findings if isinstance(f, dict) and f.get("confirmed"))
-            for s in self.sections.values()
-        )
-        speculative = total_findings - confirmed
-        lines.append(f"| Metric | Count |")
-        lines.append(f"|--------|-------|")
-        lines.append(f"| Total findings | {total_findings} |")
-        lines.append(f"| Confirmed | {confirmed} |")
-        lines.append(f"| Speculative | {speculative} |")
-        lines.append(f"| Categories | {len(self.sections)} |")
-        lines.append("")
-        all_gaps = []
-        for s in self.sections.values():
-            for g in s.gaps:
-                gap_text = g.get("text", g) if isinstance(g, dict) else g
-                all_gaps.append(f"{s.label}: {gap_text}")
-        if all_gaps:
-            lines.append("### Outstanding Gaps")
-            lines.append("")
-            for gap in all_gaps:
-                lines.append(f"- [ ] {gap}")
-            lines.append("")
-        with open(report_path, "w") as f:
-            f.write("\n".join(lines))
-        return report_path
-    def _save_data(self):
-        """Save structured intel data as JSON alongside the report."""
-        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-        slug = self.company.lower().replace(" ", "-").replace("/", "-")
-        slug = "".join(c for c in slug if c.isalnum() or c == "-")
-        data = {
-            "company": self.company,
-            "generated": datetime.now().isoformat(),
-            "sections": {},
-        }
-        for cat_key, section in self.sections.items():
-            data["sections"][cat_key] = {
-                "label": section.label,
-                "queries_executed": section.queries_executed,
-                "finding_count": len(section.findings),
-                "findings": section.findings,
-                "gaps": section.gaps,
-                "source_count": len(section.sources),
-                "sources": section.sources[:20],
-            }
-        json_path = self.output_dir / f"{timestamp}_{slug}_intel.json"
-        with open(json_path, "w") as f:
-            json.dump(data, f, indent=2)
-# ============================================================
-# Competitor Extraction (Data Processing)
-# ============================================================
-# Company definitions for extraction
-COMPANY_DEFINITIONS = {
-    "Hocoma": {"aliases": ["hocoma", "dih", "lokomat"], "country": "Switzerland", "product": "Lokomat", "status": "collapsed", "verified": True},
-    "Ekso Bionics": {"aliases": ["ekso", "eksobionics", "eksonr"], "country": "USA", "product": "EksoNR", "status": "weak", "verified": True},
-    "Cyberdyne": {"aliases": ["cyberdyne", "hal exoskeleton"], "country": "Japan", "product": "HAL", "status": "strong", "verified": True},
-    "Lifeward": {"aliases": ["lifeward", "rewalk", "alterg"], "country": "Israel/USA", "product": "ReWalk 7", "status": "consolidating", "verified": True},
-    "Fourier": {"aliases": ["fourier", "fourier intelligence"], "country": "China", "product": "X1, M2", "status": "growing", "verified": True},
-    "Myomo": {"aliases": ["myomo", "myopro"], "country": "USA", "product": "MyoPro", "status": "stable", "verified": False},
-    "Bionik": {"aliases": ["bionik", "inmotion"], "country": "Canada", "product": "InMotion", "status": "stable", "verified": False},
-    "Wandercraft": {"aliases": ["wandercraft", "atalante"], "country": "France", "product": "Atalante X", "status": "growing", "verified": False},
-}
-# Status detection keywords (order matters)
-STATUS_KEYWORDS = [
-    ("collapsed", ["bankrupt", "delisted", "suspended", "collapse", "shut down", "ceased", "nasdaq delisted"]),
-    ("weak", ["52-week low", "struggling", "losses", "declining", "layoffs"]),
-    ("growing", ["series e", "series d", "series c", "funding round", "$109 million"]),
-    ("consolidating", ["acquired", "merger", "acquisition"]),
-    ("strong", ["leader", "dominant", "profitable"]),
-]
-# Extraction patterns
-DATE_PATTERN = re.compile(
-    r'((?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})'
-    r'|(\d{4}-\d{2}-\d{2})'
-    r'|(\d{4}-\d{2})'
-    r'|((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4})'
-)
-MONEY_PATTERN = re.compile(r'\$[\d,]+(?:\.\d+)?(?:\s*(?:million|billion|M|B))?|\d+(?:\.\d+)?\s*(?:million|billion)', re.IGNORECASE)
-class CompetitorExtractor:
-    """Extract structured competitor data from research results."""
-    def __init__(self, research_dir: Path = RESEARCH_DIR,
-                 output_file: Path = None):
-        self.research_dir = research_dir
-        self.output_file = output_file or (DATA_DIR / "competitors.json")
-    def load_research_files(self) -> list[dict]:
-        """Load all JSON research files."""
-        results = []
-        if not self.research_dir.exists():
-            return results
-        for json_file in self.research_dir.glob("*.json"):
-            if json_file.name.startswith("."):
-                continue
-            try:
-                with open(json_file) as f:
-                    data = json.load(f)
-                    data["_source_file"] = json_file.name
-                    results.append(data)
-            except Exception as e:
-                print(f"Error loading {json_file}: {e}")
-        return results
-    def find_mentions(self, text: str) -> list[str]:
-        """Find which companies are mentioned in text."""
-        text_lower = text.lower()
-        mentioned = []
-        for company, info in COMPANY_DEFINITIONS.items():
-            if any(alias in text_lower for alias in info["aliases"]):
-                mentioned.append(company)
-        return mentioned
-    def normalize_date(self, date_str: str) -> str | None:
-        """Normalize date string to YYYY-MM-DD. Returns None for bogus dates."""
-        formats = ["%B %d, %Y", "%B %d %Y", "%b %d, %Y", "%b %d %Y", "%Y-%m-%d", "%Y-%m"]
-        for fmt in formats:
-            try:
-                dt = datetime.strptime(date_str.strip(), fmt)
-                if dt.year < 2010:
-                    return None
-                return dt.strftime("%Y-%m-%d")
-            except:
-                pass
-        return date_str
-    def extract_events(self, text: str, company: str) -> list[dict]:
-        """Extract events (date + context) from text."""
-        events = []
-        aliases = COMPANY_DEFINITIONS[company]["aliases"]
-        for match in DATE_PATTERN.finditer(text):
-            date_str = match.group(0)
-            if not date_str:
-                continue
-            start = max(0, match.start() - 50)
-            end = min(len(text), match.end() + 150)
-            context = text[start:end]
-            normalized = self.normalize_date(date_str)
-            if normalized and any(alias in context.lower() for alias in aliases):
-                events.append({
-                    "date": normalized,
-                    "context": context.strip()
-                })
-        return events
-    def detect_status(self, snippets: list[str], default: str) -> str:
-        """Detect status from snippets."""
-        text = " ".join(snippets).lower()
-        for status, keywords in STATUS_KEYWORDS:
-            if any(kw.lower() in text for kw in keywords):
-                return status
-        return default
-    def extract_stock(self, snippets: list[str]) -> Optional[float]:
-        """Extract stock price."""
-        for snippet in snippets:
-            match = re.search(r'\$(\d+\.?\d*)', snippet)
-            if match and float(match.group(1)) < 1000:
-                return float(match.group(1))
-        return None
-    def extract_funding(self, money_mentions: list[str]) -> Optional[int]:
-        """Extract funding amount."""
-        for m in money_mentions:
-            match = re.search(r'(\d+)\s*(?:million|M)', m, re.IGNORECASE)
-            if match:
-                return int(match.group(1)) * 1_000_000
-            match = re.search(r'(\d+\.?\d*)\s*(?:billion|B)', m, re.IGNORECASE)
-            if match:
-                return int(float(match.group(1)) * 1_000_000_000)
-        return None
-    def _load_intel_findings(self) -> dict[str, list[dict]]:
-        """Load confirmed findings from Deep Intel reports, grouped by company.
-        Returns: {company_name: [{"text": ..., "confirmed": bool, "source": ...}, ...]}
-        Only loads the latest report per company.
-        """
-        intel_dir = self.research_dir / "intel"
-        if not intel_dir.exists():
-            return {}
-        findings_by_company: dict[str, list[dict]] = {}
-        seen_companies: set[str] = set()
-        for json_file in sorted(intel_dir.glob("*_intel.json"), reverse=True):
-            try:
-                with open(json_file) as f:
-                    data = json.load(f)
-                company = data.get("company", "")
-                if not company or company in seen_companies:
-                    continue
-                seen_companies.add(company)
-                all_findings = []
-                for section in data.get("sections", {}).values():
-                    for finding in section.get("findings", []):
-                        if isinstance(finding, dict) and finding.get("text"):
-                            all_findings.append(finding)
-                        elif isinstance(finding, str) and finding:
-                            all_findings.append({"text": finding, "confirmed": False, "source": ""})
-                if all_findings:
-                    findings_by_company[company] = all_findings
-            except Exception:
-                pass
-        return findings_by_company
-    def _extract_intel_opportunities(self, intel_findings: dict[str, list[dict]]) -> list[dict]:
-        """Extract opportunity signals from Deep Intel confirmed findings.
-        Scans for: vulnerability keywords, financial weakness, market gaps,
-        technology gaps, regulatory issues, customer complaints.
-        """
-        opportunities = []
-        vuln_patterns = [
-            (r'(?:layoff|restructur|downsiz|headcount.?reduc)', "workforce_cut", 2),
-            (r'(?:delist|stock.?(?:drop|fall|declin)|52.week.low|penny.stock)', "financial_distress", 1),
-            (r'(?:FDA.?(?:reject|warning|recall)|regulatory.?(?:issue|fail|delay))', "regulatory_issue", 2),
-            (r'(?:bankrupt|insolvenc|cease.?operat|wind.?down|liquidat)', "collapse", 1),
-            (r'(?:customer.?complain|negative.?review|churn|losing.?customer)', "customer_risk", 2),
-            (r'(?:legacy|technical.?debt|outdated|proprietary.?lock)', "tech_weakness", 3),
-            (r'(?:no.?AI|lack.?(?:of.?)?(?:data|machine.learn|personali))', "ai_gap", 2),
-        ]
-        for company, findings in intel_findings.items():
-            confirmed = [f for f in findings if f.get("confirmed")]
-            all_text = " ".join(f["text"] for f in confirmed).lower() if confirmed else ""
-            all_text_full = " ".join(f["text"] for f in findings).lower()
-            for pattern, opp_type, priority in vuln_patterns:
-                # Check confirmed findings first (higher weight)
-                if re.search(pattern, all_text, re.IGNORECASE):
-                    match_finding = next(
-                        (f for f in confirmed if re.search(pattern, f["text"], re.IGNORECASE)),
-                        None
-                    )
-                    if match_finding:
-                        opportunities.append({
-                            "type": opp_type,
-                            "text": f"{company}: {match_finding['text'][:120]}",
-                            "priority": priority,
-                            "confirmed": True,
-                            "source": match_finding.get("source", ""),
-                            "company": company,
-                        })
-                # Then speculative
-                elif re.search(pattern, all_text_full, re.IGNORECASE):
-                    match_finding = next(
-                        (f for f in findings if re.search(pattern, f["text"], re.IGNORECASE)),
-                        None
-                    )
-                    if match_finding:
-                        opportunities.append({
-                            "type": opp_type,
-                            "text": f"{company}: {match_finding['text'][:120]}",
-                            "priority": priority + 1,
-                            "confirmed": False,
-                            "source": match_finding.get("source", ""),
-                            "company": company,
-                        })
-        return opportunities
-    def _load_sota_tech_signals(self) -> list[dict]:
-        """Load tech advantage signals from SOTA knowledge base.
-        Reads data/sota/knowledge_base.json, extracts high-priority
-        techniques and key principles as opportunity items.
-        """
-        kb_path = self.research_dir / "sota" / "knowledge_base.json"
-        if not kb_path.exists():
-            return []
-        try:
-            with open(kb_path) as f:
-                kb = json.load(f)
-        except Exception:
-            return []
-        signals = []
-        # High-priority techniques → tech advantages (priority 2)
-        for t in kb.get("techniques", []):
-            if t.get("priority") == "high" and t.get("gurma_fit"):
-                signals.append({
-                    "type": "tech_advantage",
-                    "text": f"{t['name']}: {t['gurma_fit'][:120]}",
-                    "priority": 2,
-                    "confirmed": True,
-                    "company": "GURMA",
-                })
-        # Key principles → strategic validation (priority 3, capped at 2)
-        for p in kb.get("key_principles", [])[:2]:
-            if p.get("principle"):
-                signals.append({
-                    "type": "tech_principle",
-                    "text": f"{p['principle']}: {p.get('detail', '')[:100]}",
-                    "priority": 3,
-                    "confirmed": True,
-                    "company": "GURMA",
-                })
-        return signals
-    def _opportunity_changed(self, new_opps: list[dict], existing: dict) -> bool:
-        """Decide whether new opportunity data is materially different from existing.
-        Returns True if the widget deserves an update. Criteria:
-        - New opportunity types appeared (e.g. a collapse that wasn't there)
-        - Priority-1 signals changed
-        - >50% of top points are different
-        """
-        existing_points = set(existing.get("points", []))
-        new_points = set(o["text"] for o in new_opps[:4])
-        if not existing_points:
-            return True
-        # Check if any priority-1 signals are new
-        new_p1_types = {o["type"] for o in new_opps if o["priority"] == 1}
-        old_raw = existing.get("raw_opportunities", [])
-        old_p1_types = {o["type"] for o in old_raw if o.get("priority") == 1}
-        if new_p1_types != old_p1_types:
-            return True
-        # Check overlap of top points — if less than half match, it's a meaningful change
-        overlap = existing_points & new_points
-        if len(overlap) < len(existing_points) / 2:
-            return True
-        return False
-    def _synthesize_opportunity_llm(self, opportunities: list[dict],
-                                    competitors: list[dict]) -> Optional[dict]:
-        """Use LLM to synthesize a strategic opportunity headline + points.
-        Returns {headline, points} or None if LLM unavailable/fails.
-        """
-        if not LLM_ENABLED:
-            return None
-        llm = LLMClient()
-        opp_text = "\n".join(
-            f"- [{o['type']}] {'[CONFIRMED]' if o.get('confirmed') else '[SPECULATIVE]'} {o['text']}"
-            for o in opportunities[:12]
-        )
-        comp_summary = "\n".join(
-            f"- {c['name']}: status={c['status']}, "
-            f"{'stock=$'+format(c['stock'], '.2f') if c.get('stock') else 'no stock data'}, "
-            f"{'funding=$'+format(c['funding']/1e6, '.0f')+'M' if c.get('funding') else 'no funding data'}"
-            for c in competitors[:8]
-        )
-        system = (
-            "You are a strategic advisor for GURMA.ai, a Swiss AI company "
-            "entering rehabilitation robotics with 15 years of patient outcome "
-            "data (not just motion data) from BAMA Teknoloji. "
-            "You produce concise, actionable strategic assessments."
-        )
-        prompt = f"""Based on the following competitive + technology signals and competitor data,
-produce a strategic opportunity assessment for GURMA.ai.
-Signals (competitive, tech advantages, and threats):
-{opp_text}
-Competitor landscape:
-{comp_summary}
-Return JSON:
-{{
-  "headline": "One punchy sentence (max 10 words) summarizing the #1 strategic opportunity",
-  "points": [
-    "Actionable insight 1 (max 20 words, include numbers where available)",
-    "Actionable insight 2",
-    "Actionable insight 3",
-    "Actionable insight 4"
-  ]
-}}
-Rules:
-- Headline should be about the OPPORTUNITY, not just a competitor's problem
-- Points should mix competitive windows, tech advantages, AND threats
-- Be specific: include dollar amounts, dates, competitor names, model/technique names
-- Maximum 4 points, ranked by strategic importance
-- confirmed signals should be weighted more heavily than speculative ones"""
-        response = llm.call(prompt, system, max_tokens=500)
-        if response:
-            match = re.search(r'\{.*\}', response, re.DOTALL)
-            if match:
-                try:
-                    result = json.loads(match.group())
-                    if result.get("headline") and result.get("points"):
-                        return result
-                except Exception:
-                    pass
-        return None
-    def detect_opportunities(self, competitors: list[dict], all_snippets: list[str]) -> dict:
-        """
-        Detect market opportunities from competitor data + Deep Intel findings.
-        Combines: structured competitor status, Deep Intel confirmed findings,
-        and optionally LLM synthesis for headline/points.
-        """
-        opportunities = []
-        # --- Source 1: Structured competitor status (always available) ---
-        collapsed = [c for c in competitors if c["status"] == "collapsed"]
-        weak = [c for c in competitors if c["status"] == "weak"]
-        if collapsed:
-            names = ", ".join(c["name"] for c in collapsed)
-            opportunities.append({
-                "type": "market_gap",
-                "text": f"{names} collapsed — customers seeking alternatives",
-                "priority": 1, "confirmed": True, "company": names,
-            })
-        if weak:
-            for c in weak:
-                opp_text = f"{c['name']} financially weak"
-                if c.get("stock"):
-                    opp_text += f" (${c['stock']:.2f})"
-                opp_text += " — vulnerable to disruption"
-                opportunities.append({
-                    "type": "weakness",
-                    "text": opp_text,
-                    "priority": 2, "confirmed": True, "company": c["name"],
-                })
-        growing = [c for c in competitors if c["status"] == "growing" and c.get("funding")]
-        for c in growing:
-            funding_m = c["funding"] / 1_000_000
-            opportunities.append({
-                "type": "threat",
-                "text": f"{c['name']} well-funded (${funding_m:.0f}M) — monitor closely",
-                "priority": 3, "confirmed": True, "company": c["name"],
-            })
-        # BAMA data advantage
-        if competitors:
-            opportunities.append({
-                "type": "advantage",
-                "text": "BAMA has 15 years outcome data vs. competitors' motion data",
-                "priority": 1, "confirmed": True, "company": "BAMA",
-            })
-        # --- Source 2: Deep Intel findings (if available) ---
-        intel_findings = self._load_intel_findings()
-        if intel_findings:
-            intel_opps = self._extract_intel_opportunities(intel_findings)
-            existing_keys = {(o.get("company", ""), o["type"]) for o in opportunities}
-            for io in intel_opps:
-                key = (io.get("company", ""), io["type"])
-                if key not in existing_keys:
-                    opportunities.append(io)
-                    existing_keys.add(key)
-        # --- Source 3: SOTA KB tech signals (if available) ---
-        sota_signals = self._load_sota_tech_signals()
-        if sota_signals:
-            existing_keys = {(o.get("company", ""), o["type"]) for o in opportunities}
-            for ts in sota_signals:
-                key = (ts.get("company", ""), ts["type"])
-                if key not in existing_keys:
-                    opportunities.append(ts)
-                    existing_keys.add(key)
-        # Sort by priority
-        opportunities.sort(key=lambda x: x["priority"])
-        # --- Headline + points: LLM synthesis or rule-based fallback ---
-        llm_result = self._synthesize_opportunity_llm(opportunities, competitors)
-        if llm_result:
-            headline = llm_result["headline"]
-            points = llm_result["points"][:4]
-        else:
-            if collapsed:
-                headline = f"{collapsed[0]['name']} collapse creates market window"
-            elif weak:
-                headline = "Competitor weakness creates opportunity"
-            else:
-                headline = "Data advantage positions GURMA.ai for growth"
-            points = [o["text"] for o in opportunities[:4]]
-        # Build sources list
-        sources = ["competitor"]
-        if intel_findings:
-            sources.append("intel")
-        if sota_signals:
-            sources.append("tech")
-        if llm_result:
-            sources.append("llm")
-        return {
-            "headline": headline,
-            "points": points,
-            "detected_at": datetime.now().strftime("%Y-%m-%d"),
-            "raw_opportunities": opportunities,
-            "sources": sources,
-        }
-    def load_existing_data(self) -> Optional[dict]:
-        """Load existing competitors.json if it exists."""
-        if self.output_file.exists():
-            try:
-                with open(self.output_file) as f:
-                    return json.load(f)
-            except:
-                pass
-        return None
-    def process(self) -> dict:
-        """Process research files and build competitors.json."""
-        research_data = self.load_research_files()
-        if not research_data:
-            return {"competitors": [], "market": {}}
-        # Aggregate data per company
-        from collections import defaultdict
-        company_data = defaultdict(lambda: {
-            "mentions": 0, "snippets": [], "events": [], "money": [], "urls": []
-        })
-        for research in research_data:
-            for result in research.get("results", []):
-                text = f"{result.get('title', '')} {result.get('snippet', '')}"
-                url = result.get("url", "")
-                for company in self.find_mentions(text):
-                    cd = company_data[company]
-                    cd["mentions"] += 1
-                    cd["snippets"].append(result.get("snippet", "")[:200])
-                    cd["urls"].append(url)
-                    cd["events"].extend(self.extract_events(text, company))
-                    cd["money"].extend(MONEY_PATTERN.findall(text))
-        # Build output
-        competitors = []
-        for company, info in COMPANY_DEFINITIONS.items():
-            data = company_data[company]
-            status = info["status"] if info.get("verified") else self.detect_status(data["snippets"], info["status"])
-            competitors.append({
-                "name": company,
-                "country": info["country"],
-                "product": info["product"],
-                "status": status,
-                "stock": self.extract_stock(data["snippets"]),
-                "funding": self.extract_funding(data["money"]),
-                "notes": data["snippets"][0] if data["snippets"] else "",
-                "mentions": data["mentions"],
-                "events": [{"date": e["date"], "event": e["context"][:100]} for e in data["events"][:10]],
-                "sample_urls": list(set(data["urls"]))[:5],
-            })
-        competitors.sort(key=lambda x: x["mentions"], reverse=True)
-        # Detect opportunities (from competitor status + Deep Intel findings)
-        all_snippets = []
-        for company, data in company_data.items():
-            all_snippets.extend(data["snippets"])
-        new_opportunity = self.detect_opportunities(competitors, all_snippets)
-        # Decide whether to update the widget
-        existing = self.load_existing_data()
-        existing_opp = existing.get("opportunity", {}) if existing else {}
-        if existing_opp.get("confirmed"):
-            # Confirmed: only flag update if data materially changed
-            if self._opportunity_changed(new_opportunity.get("raw_opportunities", []), existing_opp):
-                opportunity = existing_opp
-                opportunity["update_available"] = True
-                opportunity["suggested_update"] = new_opportunity
-            else:
-                opportunity = existing_opp
-                opportunity["update_available"] = False
-        else:
-            # Not confirmed: auto-update
-            opportunity = new_opportunity
-            opportunity["confirmed"] = False
-            opportunity["update_available"] = False
-        return {
-            "competitors": competitors,
-            "market": {"size_2024": 2_000_000_000, "size_2029_ai": 9_100_000_000, "cagr": 0.278},
-            "opportunity": opportunity,
-            "_generated": datetime.now().isoformat(),
-            "_source_files": [f.name for f in self.research_dir.glob("*.json") if not f.name.startswith(".")]
-        }
-    def save(self, data: dict = None) -> Path:
-        """Process and save to output file."""
-        data = data or self.process()
-        self.output_file.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.output_file, "w") as f:
-            json.dump(data, f, indent=2)
-        return self.output_file
-# ============================================================
-# CLI Commands
-# ============================================================
-def cmd_extract(args):
-    """Extract competitor data from research."""
-    extractor = CompetitorExtractor()
-    print(f"Loading research from: {extractor.research_dir}")
-    data = extractor.process()
-    if not data["competitors"]:
-        print("No research files found. Run 'batch' first.")
-        return
-    output = extractor.save(data)
-    print(f"Saved to: {output}")
-    print(f"\nCompany mentions:")
-    for comp in data["competitors"]:
-        status_marker = {"collapsed": "⚠", "weak": "↓", "growing": "↑", "strong": "★"}.get(comp["status"], "•")
-        print(f"  {status_marker} {comp['name']}: {comp['mentions']} mentions ({comp['status']})")
-def cmd_search(args):
-    """Single search command."""
-    service = SearchService(backend=args.backend)
-    print(f"Searching: {args.query}")
-    print(f"Backend: {args.backend} | Max: {args.max_results}")
-    print("-" * 50)
-    results = service.search(args.query, args.max_results, save=args.save)
-    for i, r in enumerate(results, 1):
-        print(f"\n{i}. {r.title}")
-        print(f"   {r.url}")
-        print(f"   {r.snippet[:150]}...")
-    print(f"\n[{len(results)} results]")
-    if args.save:
-        print(f"Saved to: {RESEARCH_DIR}")
-def cmd_batch(args):
-    """Batch research command."""
-    service = SearchService(backend=args.backend)
-    storage = ResultStorage()
-    # Generate all queries
-    queries = []
-    for company in COMPETITORS:
-        for template in BATCH_QUERY_TEMPLATES:
-            queries.append(template.format(company=company))
-    queries.extend(MARKET_QUERIES)
-    total_queries = len(queries)
-    # Deduplicate unless --force is set
-    skipped = 0
-    if not args.force:
-        recent = storage.get_recent_queries(days=args.days)
-        original_count = len(queries)
-        queries = [q for q in queries if q.lower().strip() not in recent]
-        skipped = original_count - len(queries)
-    print(f"Batch Research")
-    print(f"{'='*60}")
-    print(f"Competitors: {len(COMPETITORS)}")
-    print(f"Total queries: {total_queries}")
-    if skipped > 0:
-        print(f"Skipped (run in last {args.days} days): {skipped}")
-    print(f"New queries to run: {len(queries)}")
-    print(f"Output: {RESEARCH_DIR}")
-    print(f"{'='*60}")
-    if not queries:
-        print("\nNo new queries to run. Use --force to re-run all.")
-        return
-    def progress(i, total, query):
-        print(f"\n[{i}/{total}] {query}")
-    stats = service.search_batch(queries, args.max_results, args.delay, callback=progress)
-    success = sum(1 for v in stats.values() if v >= 0)
-    print(f"\n{'='*60}")
-    print(f"Complete: {success}/{len(queries)} successful")
-    if skipped > 0:
-        print(f"Skipped: {skipped} (already run recently)")
-    print(f"{'='*60}")
-def cmd_competitor(args):
-    """Deep competitive intelligence on a single competitor."""
-    company = args.company
-    use_external_llm = args.external_llm
-    if use_external_llm and not LLM_ENABLED:
-        print("Warning: --external-llm requested but OPENROUTER_API_KEY not found. Skipping external LLM.")
-        use_external_llm = False
-    categories = None
-    if args.categories:
-        categories = [c.strip() for c in args.categories.split(",")]
-        valid = set(DEEP_INTEL_CATEGORIES.keys())
-        invalid = [c for c in categories if c not in valid]
-        if invalid:
-            print(f"Invalid categories: {invalid}")
-            print(f"Valid: {sorted(valid)}")
-            return
-    if args.list_categories:
-        print("Available categories:")
-        for key, cat in DEEP_INTEL_CATEGORIES.items():
-            q_count = len(cat["queries"])
-            print(f"  {key:30s} {cat['label']:30s} ({q_count} queries)")
-        return
-    agent = CompetitorIntelAgent(company)
-    report_path = agent.run(
-        categories=categories,
-        use_external_llm=use_external_llm,
-        delay=args.delay,
-        max_results=args.max_results,
     )
-    print(f"\nReport: {report_path}")
-def cmd_sota(args):
-    """SOTA technology knowledge base."""
-    try:
-        from .sota_agent import SOTAScoutAgent
-    except ImportError:
-        from sota_agent import SOTAScoutAgent
-    agent = SOTAScoutAgent()
-    if args.analyze:
-        report = agent.analyze(args.analyze)
-        print(f"\nAnalysis report: {report}")
-        return
-    # Default: show knowledge base
-    agent.show(section=args.show)
-def cmd_list(args):
-    """List saved searches."""
-    storage = ResultStorage()
-    searches = storage.list_searches(args.limit)
-    if not searches:
-        print(f"No searches in {RESEARCH_DIR}")
-        return
-    print(f"Recent searches ({RESEARCH_DIR}):\n")
-    for s in searches:
-        print(f"  {s['timestamp'][:10]}  {s['results']:2d} results  {s['query'][:50]}")
-def main():
-    parser = argparse.ArgumentParser(
-        description="GURMA.ai Research Tool",
-        formatter_class=argparse.RawDescriptionHelpFormatter
     )
-    subparsers = parser.add_subparsers(dest="command", help="Commands")
-    # search
-    p_search = subparsers.add_parser("search", help="Single web search")
-    p_search.add_argument("query", help="Search query")
-    p_search.add_argument("-b", "--backend", default="duckduckgo",
-                         choices=["duckduckgo", "ddg", "serpapi", "brave"])
-    p_search.add_argument("-n", "--max-results", type=int, default=10)
-    p_search.add_argument("--no-save", dest="save", action="store_false")
-    p_search.set_defaults(func=cmd_search)
-    # batch
-    p_batch = subparsers.add_parser("batch", help="Batch research all competitors")
-    p_batch.add_argument("-b", "--backend", default="duckduckgo")
-    p_batch.add_argument("-n", "--max-results", type=int, default=10)
-    p_batch.add_argument("-d", "--delay", type=float, default=0.5)
-    p_batch.add_argument("--days", type=int, default=7,
-                        help="Skip queries run within N days (default: 7)")
-    p_batch.add_argument("-f", "--force", action="store_true",
-                        help="Force re-run all queries (ignore deduplication)")
-    p_batch.set_defaults(func=cmd_batch)
-    # competitor (deep intel)
-    p_comp = subparsers.add_parser("competitor", help="Deep competitive intelligence on a company")
-    p_comp.add_argument("company", nargs="?", default="", help="Company name (e.g. 'Ekso Bionics')")
-    p_comp.add_argument("--external-llm", action="store_true",
-                       help="Also use external LLM (OpenRouter) for enhanced analysis")
-    p_comp.add_argument("-c", "--categories", type=str, default=None,
-                       help="Comma-separated categories (default: all)")
-    p_comp.add_argument("--list-categories", action="store_true",
-                       help="List available categories")
-    p_comp.add_argument("-n", "--max-results", type=int, default=10)
-    p_comp.add_argument("-d", "--delay", type=float, default=1.0,
-                       help="Delay between searches in seconds (default: 1.0)")
-    p_comp.set_defaults(func=cmd_competitor)
-    # sota
-    p_sota = subparsers.add_parser("sota", help="SOTA technology knowledge base for GURMA.ai")
-    p_sota.add_argument("--analyze", "-a", type=str, default=None,
-                       help="Analyze a document and update knowledge base")
-    p_sota.add_argument("--show", "-s", type=str, default=None, nargs="?",
-                       const=None,
-                       choices=["models", "techniques", "stack", "principles", "actions", "sources"],
-                       help="Show specific KB section (default: summary)")
-    p_sota.set_defaults(func=cmd_sota)
-    # extract
-    p_extract = subparsers.add_parser("extract", help="Extract competitor data to JSON")
-    p_extract.set_defaults(func=cmd_extract)
-    # list
-    p_list = subparsers.add_parser("list", help="List saved searches")
-    p_list.add_argument("-l", "--limit", type=int, default=20)
-    p_list.set_defaults(func=cmd_list)
-    args = parser.parse_args()
-    if hasattr(args, "func"):
-        args.func(args)
-    else:
-        parser.print_help()
 if __name__ == "__main__":
     main()

 #!/usr/bin/env python3
 """
+GURMA.ai Research Tool — backwards-compatible shim.
+All logic has been split into focused modules:
+  config.py   — paths, API keys, constants
+  search.py   — backends, storage, SearchService
+  llm.py      — LLMClient (OpenRouter)
+  intel.py    — CompetitorIntelAgent
+  extract.py  — CompetitorExtractor, COMPANY_DEFINITIONS
+  cli.py      — CLI commands and argparse
+  sota_agent.py — SOTA knowledge base agent
+  tr_agents.py  — Turkish research agents
+This file re-exports everything so existing imports work unchanged:
+  from research import SearchService, CompetitorExtractor, ...
+  python research.py batch
 """
+try:
+    # Package context (src/utils/)
+    from .config import (
+        PROJECT_ROOT, IS_HF_SPACE, RESEARCH_DIR, DATA_DIR,
+        SERPAPI_KEY, BRAVE_API_KEY, OPENROUTER_API_KEY,
+        LLM_MODEL, LLM_ENABLED,
+        COMPETITORS, BATCH_QUERY_TEMPLATES, MARKET_QUERIES,
     )
+    from .search import (
+        WebSearchResult, SearchBackend, DuckDuckGoBackend,
+        SerpAPIBackend, BraveBackend, BACKENDS, get_backend,
+        ResultStorage, SearchService,
     )
+    from .llm import LLMClient
+    from .intel import (
+        IntelSection, CompetitorIntelAgent,
+        DEEP_INTEL_CATEGORIES, PRIMARY_SOURCE_DOMAINS,
+        CATEGORY_EXPECTED, CATEGORY_SYNTHESIS_QUESTIONS,
+    )
+    from .extract import (
+        CompetitorExtractor, COMPANY_DEFINITIONS,
+        STATUS_KEYWORDS, DATE_PATTERN, MONEY_PATTERN,
+    )
+    from .cli import main
+except ImportError:
+    # Flat-file context (HF Space: all .py files in same directory)
+    from config import (  # type: ignore[no-redef]
+        PROJECT_ROOT, IS_HF_SPACE, RESEARCH_DIR, DATA_DIR,
+        SERPAPI_KEY, BRAVE_API_KEY, OPENROUTER_API_KEY,
+        LLM_MODEL, LLM_ENABLED,
+        COMPETITORS, BATCH_QUERY_TEMPLATES, MARKET_QUERIES,
+    )
+    from search import (  # type: ignore[no-redef]
+        WebSearchResult, SearchBackend, DuckDuckGoBackend,
+        SerpAPIBackend, BraveBackend, BACKENDS, get_backend,
+        ResultStorage, SearchService,
+    )
+    from llm import LLMClient  # type: ignore[no-redef]
+    from intel import (  # type: ignore[no-redef]
+        IntelSection, CompetitorIntelAgent,
+        DEEP_INTEL_CATEGORIES, PRIMARY_SOURCE_DOMAINS,
+        CATEGORY_EXPECTED, CATEGORY_SYNTHESIS_QUESTIONS,
+    )
+    from extract import (  # type: ignore[no-redef]
+        CompetitorExtractor, COMPANY_DEFINITIONS,
+        STATUS_KEYWORDS, DATE_PATTERN, MONEY_PATTERN,
+    )
+    from cli import main  # type: ignore[no-redef]
 if __name__ == "__main__":
     main()

search.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+Search backends, result storage, and the SearchService facade.
+Provides multi-backend web search (DuckDuckGo, SerpAPI, Brave),
+result persistence (JSON + Markdown), and a unified SearchService.
+"""
+from __future__ import annotations
+import json
+import sys
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, asdict
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Protocol
+try:
+    from .config import RESEARCH_DIR, SERPAPI_KEY, BRAVE_API_KEY
+except ImportError:
+    from config import RESEARCH_DIR, SERPAPI_KEY, BRAVE_API_KEY
+# ============================================================
+# Data Types
+# ============================================================
+class SearchResult(Protocol):
+    title: str
+    url: str
+    snippet: str
+    source: str
+@dataclass
+class WebSearchResult:
+    title: str
+    url: str
+    snippet: str
+    source: str
+# ============================================================
+# Search Backends
+# ============================================================
+class SearchBackend(ABC):
+    """Abstract base for search backends."""
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        pass
+    @abstractmethod
+    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
+        pass
+    @abstractmethod
+    def is_available(self) -> bool:
+        pass
+class DuckDuckGoBackend(SearchBackend):
+    @property
+    def name(self) -> str:
+        return "duckduckgo"
+    def is_available(self) -> bool:
+        try:
+            from ddgs import DDGS
+            return True
+        except ImportError:
+            try:
+                from duckduckgo_search import DDGS
+                return True
+            except ImportError:
+                return False
+    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
+        try:
+            from ddgs import DDGS
+        except ImportError:
+            from duckduckgo_search import DDGS
+        results = []
+        ddgs = DDGS()
+        for r in ddgs.text(query, max_results=max_results):
+            results.append(WebSearchResult(
+                title=r.get("title", ""),
+                url=r.get("href", r.get("link", "")),
+                snippet=r.get("body", r.get("snippet", "")),
+                source=self.name
+            ))
+        return results
+class SerpAPIBackend(SearchBackend):
+    @property
+    def name(self) -> str:
+        return "serpapi"
+    def is_available(self) -> bool:
+        try:
+            import requests
+            return bool(SERPAPI_KEY)
+        except ImportError:
+            return False
+    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
+        import requests
+        response = requests.get(
+            "https://serpapi.com/search",
+            params={"q": query, "api_key": SERPAPI_KEY, "engine": "google", "num": max_results},
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        results = []
+        for r in data.get("organic_results", [])[:max_results]:
+            results.append(WebSearchResult(
+                title=r.get("title", ""),
+                url=r.get("link", ""),
+                snippet=r.get("snippet", ""),
+                source=self.name
+            ))
+        return results
+class BraveBackend(SearchBackend):
+    @property
+    def name(self) -> str:
+        return "brave"
+    def is_available(self) -> bool:
+        try:
+            import requests
+            return bool(BRAVE_API_KEY)
+        except ImportError:
+            return False
+    def search(self, query: str, max_results: int = 10) -> list[WebSearchResult]:
+        import requests
+        response = requests.get(
+            "https://api.search.brave.com/res/v1/web/search",
+            headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_API_KEY},
+            params={"q": query, "count": min(max_results, 20)},
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        results = []
+        for r in data.get("web", {}).get("results", [])[:max_results]:
+            results.append(WebSearchResult(
+                title=r.get("title", ""),
+                url=r.get("url", ""),
+                snippet=r.get("description", ""),
+                source=self.name
+            ))
+        return results
+# Backend registry
+BACKENDS: dict[str, SearchBackend] = {
+    "duckduckgo": DuckDuckGoBackend(),
+    "ddg": DuckDuckGoBackend(),
+    "serpapi": SerpAPIBackend(),
+    "brave": BraveBackend(),
+}
+def get_backend(name: str = "duckduckgo") -> SearchBackend:
+    backend = BACKENDS.get(name)
+    if not backend:
+        raise ValueError(f"Unknown backend: {name}. Available: {list(BACKENDS.keys())}")
+    if not backend.is_available():
+        raise RuntimeError(f"Backend '{name}' not available. Check dependencies/API keys.")
+    return backend
+# ============================================================
+# Result Storage
+# ============================================================
+class ResultStorage:
+    def __init__(self, directory: Path = RESEARCH_DIR):
+        self.directory = directory
+        self.directory.mkdir(parents=True, exist_ok=True)
+    def save(self, query: str, results: list[WebSearchResult], backend: str) -> tuple[Path, Path]:
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        slug = self._slugify(query)
+        base_name = f"{timestamp}_{slug}"
+        data = {
+            "query": query,
+            "timestamp": datetime.now().isoformat(),
+            "backend": backend,
+            "result_count": len(results),
+            "results": [asdict(r) for r in results]
+        }
+        json_path = self.directory / f"{base_name}.json"
+        with open(json_path, "w") as f:
+            json.dump(data, f, indent=2)
+        md_path = self.directory / f"{base_name}.md"
+        with open(md_path, "w") as f:
+            f.write(f"# Search: {query}\n\n")
+            f.write(f"**Date:** {data['timestamp']}  \n")
+            f.write(f"**Backend:** {backend}  \n")
+            f.write(f"**Results:** {len(results)}\n\n---\n")
+            for i, r in enumerate(results, 1):
+                f.write(f"\n## {i}. {r.title}\n\n**URL:** {r.url}\n\n{r.snippet}\n")
+        return json_path, md_path
+    def list_searches(self, limit: int = 20) -> list[dict]:
+        searches = []
+        for json_file in sorted(self.directory.glob("*.json"), reverse=True):
+            if json_file.name.startswith("."):
+                continue
+            try:
+                with open(json_file) as f:
+                    data = json.load(f)
+                    searches.append({
+                        "file": json_file.name,
+                        "query": data.get("query", ""),
+                        "timestamp": data.get("timestamp", ""),
+                        "results": data.get("result_count", 0)
+                    })
+            except:
+                pass
+            if len(searches) >= limit:
+                break
+        return searches
+    def get_recent_queries(self, days: int = 7) -> set[str]:
+        cutoff = datetime.now() - timedelta(days=days)
+        recent = set()
+        for json_file in self.directory.glob("*.json"):
+            if json_file.name.startswith("."):
+                continue
+            try:
+                with open(json_file) as f:
+                    data = json.load(f)
+                    ts = data.get("timestamp", "")
+                    if ts:
+                        file_date = datetime.fromisoformat(ts.replace("Z", "+00:00").split("+")[0])
+                        if file_date >= cutoff:
+                            query = data.get("query", "").lower().strip()
+                            recent.add(query)
+            except:
+                pass
+        return recent
+    def _slugify(self, text: str, max_len: int = 50) -> str:
+        slug = text.lower()[:max_len].replace(" ", "-").replace("/", "-")
+        return "".join(c for c in slug if c.isalnum() or c == "-")
+# ============================================================
+# Search Service (Facade)
+# ============================================================
+class SearchService:
+    """High-level search interface combining backend and storage."""
+    def __init__(self, backend: str = "duckduckgo", storage: ResultStorage = None):
+        self.backend = get_backend(backend)
+        self.storage = storage or ResultStorage()
+    def search(self, query: str, max_results: int = 10, save: bool = True) -> list[WebSearchResult]:
+        results = self.backend.search(query, max_results)
+        if save and results:
+            self.storage.save(query, results, self.backend.name)
+        return results
+    def search_batch(self, queries: list[str], max_results: int = 10,
+                     delay: float = 0.5, callback=None) -> dict[str, int]:
+        stats = {}
+        for i, query in enumerate(queries, 1):
+            if callback:
+                callback(i, len(queries), query)
+            try:
+                results = self.search(query, max_results, save=True)
+                stats[query] = len(results)
+            except Exception as e:
+                stats[query] = -1
+                print(f"Error on '{query}': {e}", file=sys.stderr)
+            if delay > 0 and i < len(queries):
+                time.sleep(delay)
+        return stats

sota_agent.py ADDED Viewed

	@@ -0,0 +1,850 @@

+#!/usr/bin/env python3
+"""
+GURMA.ai SOTA Technology Knowledge Agent
+Maintains a persistent knowledge base of state-of-the-art models, techniques,
+and tools relevant to GURMA.ai's high-precision medical/rehabilitation AI domain.
+The agent is "aware" of GURMA.ai's strategic position (outcome data moat,
+edge-first, safety-critical) and filters all technology developments through
+that lens. It updates itself when fed new information (papers, podcasts,
+announcements).
+Usage:
+    python research.py sota                        # Show current knowledge base
+    python research.py sota --analyze notes/research/podcast-sota-models.md
+    python research.py sota --show models           # Show tracked models
+    python research.py sota --show techniques       # Show tracked techniques
+    python research.py sota --show stack            # Show recommended tech stack
+"""
+from __future__ import annotations
+import json
+import re
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+try:
+    from .llm import LLMClient
+    from .config import RESEARCH_DIR, LLM_ENABLED
+except ImportError:
+    from llm import LLMClient
+    from config import RESEARCH_DIR, LLM_ENABLED
+# ============================================================
+# GURMA.ai Context — what the agent "knows" about the company
+# ============================================================
+GURMA_CONTEXT = {
+    "company": "GURMA.ai AG (Swiss)",
+    "domain": "Rehabilitation robotics AI — high-precision medical domain",
+    "data_moat": "15 years of patient outcome data from BAMA Teknoloji "
+                 "(gait dynamics, EMG signals, recovery outcomes — not just motion data)",
+    "products": ["RoboGate (stationary gait rehab robot)", "FreeGate (5-axis mobile exoskeleton)"],
+    "architecture": "Privacy-first edge computing — no cloud data exposure",
+    "regulatory": "EU AI Act (high-risk), MDR, ISO 13485, GDPR/KVKK — 80% safety focus from day one",
+    "precision_requirement": (
+        "Medical rehabilitation demands super-high precision: wrong therapy parameters "
+        "can harm patients. Models must be verifiable, explainable, and fail-safe. "
+        "This is NOT a domain where 'good enough' works — it requires domain-specific "
+        "training on real outcome data with verifiable reward signals."
+    ),
+    "core_thesis": (
+        "Proprietary outcome data + domain expertise + regulatory focus = defensible AI moat. "
+        "Frontier labs are NOT focusing on domain-specific medical applications."
+    ),
+}
+# ============================================================
+# Relevance Scoring — how the agent filters incoming info
+# ============================================================
+GURMA_RELEVANCE_SIGNALS = {
+    "outcome_data": {
+        "weight": 3,
+        "description": "Patient outcomes as training signal / verifiable rewards",
+        "patterns": [
+            r"outcome.?data", r"patient.?outcome", r"recovery.?outcome",
+            r"treatment.?outcome", r"verifiable.?reward", r"reward.?model",
+            r"clinical.?outcome", r"reward.?signal",
+        ],
+    },
+    "rehabilitation": {
+        "weight": 3,
+        "description": "Rehabilitation, gait, exoskeleton, motor recovery tech",
+        "patterns": [
+            r"rehabilitat", r"gait.?(?:analysis|dynamic|training)",
+            r"exoskeleton", r"physical.?therapy", r"motor.?recovery",
+            r"neurorehab", r"stroke.?recovery",
+        ],
+    },
+    "high_precision": {
+        "weight": 3,
+        "description": "High-precision / safety-critical model requirements",
+        "patterns": [
+            r"high.?precision", r"safety.?critical", r"fail.?(?:safe|never)",
+            r"verification.?layer", r"verifiable", r"explainabl",
+            r"clinical.?(?:accuracy|precision|validation)",
+        ],
+    },
+    "domain_specific": {
+        "weight": 2,
+        "description": "Domain-specific fine-tuning — GURMA.ai's core approach",
+        "patterns": [
+            r"domain.specific", r"fine.tun", r"\bLoRA\b", r"specialized.?model",
+            r"medical.?(?:model|AI|LLM)", r"clinical.?(?:NLP|model)",
+            r"proprietary.?data",
+        ],
+    },
+    "rl_training": {
+        "weight": 2,
+        "description": "RL post-training — outcome data as rewards",
+        "patterns": [
+            r"\bRLHF\b", r"\bRLVR\b", r"reinforcement.?learning",
+            r"post.training", r"\bPPO\b", r"\bGRPO\b", r"reward.?shaping",
+        ],
+    },
+    "edge_privacy": {
+        "weight": 2,
+        "description": "Edge inference, on-device, privacy-first deployment",
+        "patterns": [
+            r"edge.?(?:computing|deploy|inference|device)",
+            r"on.device", r"privacy.first", r"local.?inference",
+            r"quantiz", r"on.premise",
+        ],
+    },
+    "safety_regulatory": {
+        "weight": 2,
+        "description": "AI safety, medical device regulation, verification",
+        "patterns": [
+            r"AI.?safety", r"medical.?device", r"(?:EU|FDA).?(?:AI|regulat)",
+            r"\bMDR\b", r"CE.?mark", r"ISO.?13485", r"verification.?layer",
+            r"constitutional.?AI",
+        ],
+    },
+    "robotics": {
+        "weight": 2,
+        "description": "Robotics AI, world models, embodied AI, patient simulation",
+        "patterns": [
+            r"robot(?:ic)?s?.?(?:AI|control|learning)",
+            r"world.?model", r"embodied.?AI", r"simulation.?model",
+            r"continual.?learning",
+        ],
+    },
+    "open_models": {
+        "weight": 1,
+        "description": "Open-weight models with clear licensing for medical use",
+        "patterns": [
+            r"open.weight", r"open.source.?(?:model|LLM)",
+            r"\bQwen\b", r"\bOLMo\b", r"DeepSeek", r"\bLlama\b",
+            r"\bMistral\b", r"gpt.oss",
+        ],
+    },
+    "tool_use": {
+        "weight": 1,
+        "description": "Tool-calling AI for clinical workflow automation",
+        "patterns": [
+            r"tool.?(?:use|calling)", r"function.?call",
+            r"(?:AI|LLM).?agent", r"autonomous.?agent",
+        ],
+    },
+}
+# ============================================================
+# Initial Knowledge Base — seeded from podcast analysis
+# ============================================================
+INITIAL_KNOWLEDGE_BASE = {
+    "models": [
+        {
+            "name": "Qwen 3",
+            "params": "7B-32B range",
+            "why": "Best open-weight performance (50T tokens trained), friendly commercial license",
+            "gurma_fit": "Base model for domain fine-tuning; fewer restrictions than Llama for medical device use",
+            "status": "recommended",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "OLMo 3",
+            "params": "7B+",
+            "why": "Fully documented training process, truly open (AI2), great for understanding methodology",
+            "gurma_fit": "Best for learning/reproducing training; full transparency aids regulatory documentation",
+            "status": "recommended",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "gpt-oss-120b",
+            "params": "120B",
+            "why": "First open model specifically trained with tool use in mind",
+            "gurma_fit": "Tool-calling for patient data APIs, sensor queries, automated insurance reporting",
+            "status": "watch",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "DeepSeek-V3.2",
+            "params": "varies",
+            "why": "Sparse attention architecture, efficient inference",
+            "gurma_fit": "Sparse attention promising for edge deployment on RoboGate/FreeGate",
+            "status": "watch",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+    ],
+    "techniques": [
+        {
+            "name": "RLVR (Reinforcement Learning with Verifiable Rewards)",
+            "category": "post-training",
+            "why": "Post-training is the 'skill unlock' — pre-training gives knowledge, post-training gives precision",
+            "gurma_fit": "Patient recovery outcomes ARE verifiable rewards. 15 years of outcome data = perfect RLVR signal.",
+            "priority": "high",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "LoRA (Low-Rank Adaptation)",
+            "category": "fine-tuning",
+            "why": "Fine-tune only a small subset of weights; practical on limited compute; proven on 7B models",
+            "gurma_fit": "Start with 7B models + LoRA for engagement scoring and outcome prediction. Efficient enough for iterative experiments.",
+            "priority": "high",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "PPO / GRPO",
+            "category": "post-training",
+            "why": "Policy gradient algorithms for RL post-training; PPO is standard, GRPO is newer group-relative approach",
+            "gurma_fit": "Algorithms to train models using outcome data as reward signal",
+            "priority": "medium",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "Synthetic Data (reformatting)",
+            "category": "data-processing",
+            "why": "Not 'AI-generated fake data' — means restructuring real data into training formats (Q&A, summaries). OCR for medical PDFs.",
+            "gurma_fit": "Convert EMG readings → Q&A format, session notes → case summaries, treatment logs → outcome predictions",
+            "priority": "high",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "World Models",
+            "category": "simulation",
+            "why": "Model runs a simulation of the environment; verifies intermediate states, not just final results",
+            "gurma_fit": "Patient progress modeling IS a world model problem. Verify intermediate therapy states, simulate treatment outcomes.",
+            "priority": "medium",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "name": "Sparse Attention",
+            "category": "efficiency",
+            "why": "Lightweight token selection indexer; efficient inference for edge deployment",
+            "gurma_fit": "Could enable on-device models for RoboGate/FreeGate with privacy-first architecture",
+            "priority": "medium",
+            "added": "2026-02-06",
+            "source": "Lex Fridman Podcast #490",
+        },
+    ],
+    "tech_stack": [
+        {"component": "Base Model", "recommendation": "Qwen 3 (7B-32B) or OLMo 3", "rationale": "Open weights, good license, well-documented"},
+        {"component": "Fine-tuning", "recommendation": "LoRA + RLVR", "rationale": "Practical compute, outcome-based rewards"},
+        {"component": "Tool Use Model", "recommendation": "gpt-oss-120b", "rationale": "Specifically trained for tool calling"},
+        {"component": "Training Framework", "recommendation": "TRL (Hugging Face)", "rationale": "RLHF/RLVR implementation"},
+        {"component": "Inference", "recommendation": "vLLM or SGLang", "rationale": "Production-ready, NOT HF Transformers in prod"},
+        {"component": "Edge", "recommendation": "Quantized models + sparse attention", "rationale": "Privacy-first deployment"},
+        {"component": "Safety", "recommendation": "Constitutional AI principles + verification layers", "rationale": "Medical device compliance, 'allowed to fail never'"},
+    ],
+    "key_principles": [
+        {
+            "principle": "Domain-specific data is the moat",
+            "detail": "Frontier labs won't build rehab-specific models. Proprietary outcome data that nobody else can access is the defensible advantage.",
+            "source": "Lex Fridman Podcast #490 — Sebastian Raschka",
+        },
+        {
+            "principle": "Post-training over pre-training",
+            "detail": "Don't spend on pre-training. Use open base models + invest in post-training (RLVR) where outcome data becomes the competitive edge.",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "principle": "Data quality over quantity",
+            "detail": "Curate aggressively. Reformat existing data into multiple training formats. Clean > big.",
+            "source": "Lex Fridman Podcast #490 — Nathan Lambert",
+        },
+        {
+            "principle": "Human verification mandatory for medical AI",
+            "detail": "Tool-calling and autonomous agents still require human-in-the-loop for trust/safety in clinical context.",
+            "source": "Lex Fridman Podcast #490",
+        },
+        {
+            "principle": "High precision is non-negotiable",
+            "detail": "Medical rehab is a 'fail never' domain. Models must be verifiable, with intermediate state checking (world model approach).",
+            "source": "Lex Fridman Podcast #490 — Lex Fridman",
+        },
+    ],
+    "sources_analyzed": [
+        {
+            "name": "Lex Fridman Podcast #490 — State of AI in 2026",
+            "type": "podcast",
+            "date": "2026-02-06",
+            "key_speakers": "Nathan Lambert (AI2), Sebastian Raschka",
+            "insights_extracted": 10,
+        },
+    ],
+    "action_items": [
+        {"item": "Experiment with Qwen 3 / OLMo 3 on rehabilitation domain prompts", "status": "pending"},
+        {"item": "Structure outcome data for RLVR — create verifiable reward functions (gait improvement score, session completion rate)", "status": "pending"},
+        {"item": "Build tool schemas — APIs for patient data access, sensor queries, report generation", "status": "pending"},
+        {"item": "Set up vLLM for production inference", "status": "pending"},
+        {"item": "Study Nathan Lambert's RLHF book when released", "status": "pending"},
+    ],
+    "metadata": {
+        "created": "2026-02-06",
+        "last_updated": "2026-02-06",
+        "version": 1,
+    },
+}
+# ============================================================
+# SOTA Knowledge Agent
+# ============================================================
+class SOTAScoutAgent:
+    """Maintains and updates GURMA.ai's SOTA technology knowledge base.
+    The agent understands that GURMA.ai operates in a high-precision medical
+    domain where model accuracy, verifiability, and safety are non-negotiable.
+    It filters all technology developments through this lens.
+    Usage:
+        agent = SOTAScoutAgent()
+        agent.show()                               # Print current KB state
+        agent.show("models")                       # Show tracked models
+        agent.analyze("notes/research/podcast.md") # Analyze + update KB
+    """
+    def __init__(self, llm: LLMClient = None):
+        self.llm = llm or LLMClient()
+        self.kb_dir = RESEARCH_DIR / "sota"
+        self.kb_dir.mkdir(parents=True, exist_ok=True)
+        self.kb_path = self.kb_dir / "knowledge_base.json"
+        self.kb = self._load_kb()
+    # ----------------------------------------------------------
+    # Persistence
+    # ----------------------------------------------------------
+    def _load_kb(self) -> dict:
+        """Load existing KB or initialize from seed."""
+        if self.kb_path.exists():
+            try:
+                with open(self.kb_path) as f:
+                    return json.load(f)
+            except Exception:
+                pass
+        # First run — seed from initial knowledge
+        kb = json.loads(json.dumps(INITIAL_KNOWLEDGE_BASE))
+        self._save_kb(kb)
+        return kb
+    def _save_kb(self, kb: dict = None):
+        """Persist knowledge base to disk."""
+        kb = kb or self.kb
+        kb["metadata"]["last_updated"] = datetime.now().strftime("%Y-%m-%d")
+        with open(self.kb_path, "w") as f:
+            json.dump(kb, f, indent=2)
+    # ----------------------------------------------------------
+    # Public: Show
+    # ----------------------------------------------------------
+    def show(self, section: str = None) -> dict:
+        """Display current knowledge base state.
+        Args:
+            section: Optional — "models", "techniques", "stack", "principles",
+                     "actions", "sources". None = summary of everything.
+        Returns: The KB data (also prints to stdout).
+        """
+        if section == "models":
+            self._print_models()
+        elif section == "techniques":
+            self._print_techniques()
+        elif section == "stack":
+            self._print_stack()
+        elif section == "principles":
+            self._print_principles()
+        elif section == "actions":
+            self._print_actions()
+        elif section == "sources":
+            self._print_sources()
+        else:
+            self._print_summary()
+        return self.kb
+    def _print_summary(self):
+        meta = self.kb.get("metadata", {})
+        models = self.kb.get("models", [])
+        techniques = self.kb.get("techniques", [])
+        stack = self.kb.get("tech_stack", [])
+        principles = self.kb.get("key_principles", [])
+        actions = self.kb.get("action_items", [])
+        sources = self.kb.get("sources_analyzed", [])
+        print(f"\n{'='*60}")
+        print(f"GURMA.ai SOTA Knowledge Base")
+        print(f"{'='*60}")
+        print(f"Last updated: {meta.get('last_updated', 'unknown')}")
+        print(f"Version: {meta.get('version', 0)}")
+        print(f"")
+        print(f"  Models tracked:     {len(models)}")
+        print(f"  Techniques tracked: {len(techniques)}")
+        print(f"  Tech stack items:   {len(stack)}")
+        print(f"  Key principles:     {len(principles)}")
+        print(f"  Action items:       {len(actions)} ({sum(1 for a in actions if a.get('status') == 'pending')} pending)")
+        print(f"  Sources analyzed:   {len(sources)}")
+        print(f"")
+        rec_models = [m for m in models if m.get("status") == "recommended"]
+        if rec_models:
+            print(f"Recommended models:")
+            for m in rec_models:
+                print(f"  * {m['name']} ({m.get('params', '?')}) — {m.get('gurma_fit', '')[:80]}")
+        high_tech = [t for t in techniques if t.get("priority") == "high"]
+        if high_tech:
+            print(f"\nHigh-priority techniques:")
+            for t in high_tech:
+                print(f"  * {t['name']} — {t.get('gurma_fit', '')[:80]}")
+        pending = [a for a in actions if a.get("status") == "pending"]
+        if pending:
+            print(f"\nPending action items:")
+            for a in pending[:5]:
+                print(f"  [ ] {a['item']}")
+        print(f"\n{'='*60}")
+    def _print_models(self):
+        print(f"\n--- Tracked Models ---\n")
+        for m in self.kb.get("models", []):
+            status_icon = {"recommended": "*", "watch": "~", "deprecated": "x"}.get(m.get("status", ""), "?")
+            print(f"  [{status_icon}] {m['name']} ({m.get('params', '?')})")
+            print(f"      Why: {m.get('why', '')}")
+            print(f"      GURMA.ai fit: {m.get('gurma_fit', '')}")
+            print(f"      Source: {m.get('source', '')} | Added: {m.get('added', '')}")
+            print()
+    def _print_techniques(self):
+        print(f"\n--- Tracked Techniques ---\n")
+        for t in self.kb.get("techniques", []):
+            pri = {"high": "!!!", "medium": "!!", "low": "!"}.get(t.get("priority", ""), "?")
+            print(f"  [{pri}] {t['name']} ({t.get('category', '')})")
+            print(f"      Why: {t.get('why', '')}")
+            print(f"      GURMA.ai fit: {t.get('gurma_fit', '')}")
+            print()
+    def _print_stack(self):
+        print(f"\n--- Recommended Tech Stack ---\n")
+        for s in self.kb.get("tech_stack", []):
+            print(f"  {s['component']:20s} -> {s['recommendation']}")
+            print(f"  {'':20s}    {s.get('rationale', '')}")
+            print()
+    def _print_principles(self):
+        print(f"\n--- Key Principles ---\n")
+        for p in self.kb.get("key_principles", []):
+            print(f"  * {p['principle']}")
+            print(f"    {p.get('detail', '')}")
+            print()
+    def _print_actions(self):
+        print(f"\n--- Action Items ---\n")
+        for a in self.kb.get("action_items", []):
+            icon = "[x]" if a.get("status") == "done" else "[ ]"
+            print(f"  {icon} {a['item']}")
+    def _print_sources(self):
+        print(f"\n--- Analyzed Sources ---\n")
+        for s in self.kb.get("sources_analyzed", []):
+            print(f"  {s.get('date', '?')} | {s['name']} ({s.get('type', '')})")
+            if s.get("key_speakers"):
+                print(f"    Speakers: {s['key_speakers']}")
+            print(f"    Insights extracted: {s.get('insights_extracted', 0)}")
+            print()
+    # ----------------------------------------------------------
+    # Public: Analyze document and update KB
+    # ----------------------------------------------------------
+    def analyze(self, file_path: str) -> Path:
+        """Analyze a document for GURMA.ai-relevant SOTA insights and update KB.
+        Reads the file, scores sections for relevance, uses LLM to extract
+        structured insights, and merges new findings into the knowledge base.
+        Args:
+            file_path: Path to markdown/text file
+        Returns: Path to generated analysis report
+        """
+        try:
+            from .config import PROJECT_ROOT
+        except ImportError:
+            from config import PROJECT_ROOT
+        path = Path(file_path)
+        if not path.is_absolute():
+            path = PROJECT_ROOT / file_path
+        if not path.exists():
+            raise FileNotFoundError(f"File not found: {path}")
+        print(f"\n{'='*60}")
+        print(f"Analyzing: {path.name}")
+        print(f"{'='*60}\n")
+        text = path.read_text(encoding="utf-8")
+        # Score sections for relevance
+        sections = self._split_sections(text)
+        scored = []
+        for sec in sections:
+            if len(sec.strip()) < 50:
+                continue
+            score, tags = self._score_relevance(sec)
+            if score > 0:
+                scored.append({"text": sec.strip()[:500], "score": score, "tags": tags})
+        scored.sort(key=lambda x: -x["score"])
+        print(f"Sections: {len(sections)} total, {len(scored)} relevant")
+        # LLM extraction — structured insights for KB update
+        llm_update = None
+        if self.llm.enabled:
+            print("[LLM] Extracting structured insights...")
+            llm_update = self._extract_kb_updates(text, path.name)
+            if llm_update:
+                n_models = len(llm_update.get("new_models", []))
+                n_tech = len(llm_update.get("new_techniques", []))
+                n_actions = len(llm_update.get("new_action_items", []))
+                print(f"[LLM] Found: {n_models} models, {n_tech} techniques, {n_actions} action items")
+        else:
+            print("[INFO] LLM not available — relevance scoring only, no KB update")
+        # Merge into knowledge base
+        changes = self._merge_updates(llm_update, path.name, len(scored))
+        # Generate report
+        report_path = self._generate_report(path.name, scored, llm_update, changes)
+        # Save updated KB
+        self._save_kb()
+        print(f"\nKB updated: {changes}")
+        print(f"Report: {report_path}")
+        return report_path
+    # ----------------------------------------------------------
+    # Internal: Relevance scoring
+    # ----------------------------------------------------------
+    def _score_relevance(self, text: str) -> tuple[float, list[str]]:
+        """Score text against GURMA.ai's high-precision domain themes.
+        Returns (score 0.0-1.0, matched signal tags).
+        """
+        text_lower = text.lower()
+        total_weight = 0
+        max_possible = sum(s["weight"] for s in GURMA_RELEVANCE_SIGNALS.values())
+        matched_tags = []
+        for tag, signal in GURMA_RELEVANCE_SIGNALS.items():
+            for pattern in signal["patterns"]:
+                if re.search(pattern, text_lower, re.IGNORECASE):
+                    total_weight += signal["weight"]
+                    matched_tags.append(tag)
+                    break
+        score = min(total_weight / max_possible, 1.0) if max_possible > 0 else 0.0
+        return round(score, 3), matched_tags
+    def _split_sections(self, text: str) -> list[str]:
+        """Split markdown into logical sections."""
+        sections = re.split(r'\n#{1,3}\s+', text)
+        return [s.strip() for s in sections if s.strip()]
+    # ----------------------------------------------------------
+    # Internal: LLM extraction for KB update
+    # ----------------------------------------------------------
+    def _extract_kb_updates(self, text: str, filename: str) -> Optional[dict]:
+        """Use LLM to extract structured KB updates from a document.
+        Returns dict with new_models, new_techniques, stack_updates,
+        new_principles, new_action_items, and strategic_note.
+        """
+        truncated = text[:12000]
+        # Include current KB state so LLM can detect what's truly new
+        current_models = ", ".join(m["name"] for m in self.kb.get("models", []))
+        current_techniques = ", ".join(t["name"] for t in self.kb.get("techniques", []))
+        system = (
+            "You are the technology intelligence agent for GURMA.ai, a Swiss AI company "
+            "building high-precision models for rehabilitation robotics. "
+            "GURMA.ai has 15 years of patient outcome data (gait dynamics, EMG, recovery outcomes) "
+            "from BAMA Teknoloji. Their domain requires SUPER-HIGH PRECISION — "
+            "wrong therapy parameters can harm patients. "
+            "Architecture: privacy-first edge computing. "
+            "Regulatory: EU AI Act (high-risk), MDR, ISO 13485. "
+            "Core thesis: proprietary outcome data + domain expertise + safety focus = defensible moat. "
+            "Your job: extract technology insights that help GURMA.ai build better, "
+            "safer, more precise models for this domain."
+        )
+        prompt = f"""Analyze this document and extract NEW technology insights for GURMA.ai's knowledge base.
+Document: {filename}
+---
+{truncated}
+---
+Already tracked models: {current_models}
+Already tracked techniques: {current_techniques}
+Extract ONLY information that is NEW or updates existing knowledge.
+Focus on what matters for a high-precision medical AI domain.
+Return JSON:
+{{
+  "new_models": [
+    {{
+      "name": "Model name",
+      "params": "Size/params",
+      "why": "Why it matters in general",
+      "gurma_fit": "Specific fit for GURMA.ai's high-precision rehab domain",
+      "status": "recommended|watch|deprecated"
+    }}
+  ],
+  "new_techniques": [
+    {{
+      "name": "Technique name",
+      "category": "post-training|fine-tuning|data-processing|efficiency|simulation|safety",
+      "why": "Why it matters",
+      "gurma_fit": "How GURMA.ai should use it for high-precision medical AI",
+      "priority": "high|medium|low"
+    }}
+  ],
+  "stack_updates": [
+    {{
+      "component": "Which tech stack component to update",
+      "recommendation": "New recommendation",
+      "rationale": "Why this change"
+    }}
+  ],
+  "new_principles": [
+    {{
+      "principle": "Short principle statement",
+      "detail": "Explanation and evidence"
+    }}
+  ],
+  "new_action_items": [
+    "Concrete next step for GURMA.ai"
+  ],
+  "strategic_note": "How this document affects GURMA.ai's strategy (1-2 sentences, or null if no change)"
+}}
+Rules:
+- ONLY include genuinely new information not already in the tracked lists
+- Every item must connect to GURMA.ai's HIGH-PRECISION medical domain
+- If a model/technique is already tracked, skip it (don't duplicate)
+- If existing knowledge should be UPDATED (e.g. new version), include it with the update
+- Be specific: name versions, papers, benchmarks
+- Empty arrays are fine if nothing new is found"""
+        response = self.llm.call(prompt, system, max_tokens=3000)
+        if response:
+            match = re.search(r'\{.*\}', response, re.DOTALL)
+            if match:
+                try:
+                    return json.loads(match.group())
+                except Exception:
+                    pass
+        return None
+    # ----------------------------------------------------------
+    # Internal: Merge updates into KB
+    # ----------------------------------------------------------
+    def _merge_updates(self, llm_update: Optional[dict], source_name: str,
+                       insights_count: int) -> dict:
+        """Merge LLM-extracted updates into the knowledge base.
+        Returns summary of changes made.
+        """
+        changes = {"models_added": 0, "techniques_added": 0,
+                    "stack_updated": 0, "principles_added": 0,
+                    "actions_added": 0}
+        # Record source
+        self.kb.setdefault("sources_analyzed", []).append({
+            "name": source_name,
+            "type": "document",
+            "date": datetime.now().strftime("%Y-%m-%d"),
+            "insights_extracted": insights_count,
+        })
+        if not llm_update:
+            return changes
+        today = datetime.now().strftime("%Y-%m-%d")
+        # Merge models
+        existing_names = {m["name"].lower() for m in self.kb.get("models", [])}
+        for m in llm_update.get("new_models", []):
+            if m.get("name", "").lower() not in existing_names:
+                m["added"] = today
+                m["source"] = source_name
+                self.kb["models"].append(m)
+                changes["models_added"] += 1
+        # Merge techniques
+        existing_tech = {t["name"].lower() for t in self.kb.get("techniques", [])}
+        for t in llm_update.get("new_techniques", []):
+            if t.get("name", "").lower() not in existing_tech:
+                t["added"] = today
+                t["source"] = source_name
+                self.kb["techniques"].append(t)
+                changes["techniques_added"] += 1
+        # Stack updates — replace matching components
+        for su in llm_update.get("stack_updates", []):
+            component = su.get("component", "")
+            updated = False
+            for i, existing in enumerate(self.kb.get("tech_stack", [])):
+                if existing["component"].lower() == component.lower():
+                    self.kb["tech_stack"][i] = su
+                    updated = True
+                    changes["stack_updated"] += 1
+                    break
+            if not updated and component:
+                self.kb["tech_stack"].append(su)
+                changes["stack_updated"] += 1
+        # Merge principles
+        existing_principles = {p["principle"].lower() for p in self.kb.get("key_principles", [])}
+        for p in llm_update.get("new_principles", []):
+            if p.get("principle", "").lower() not in existing_principles:
+                p["source"] = source_name
+                self.kb["key_principles"].append(p)
+                changes["principles_added"] += 1
+        # Merge action items
+        existing_actions = {a["item"].lower() for a in self.kb.get("action_items", [])}
+        for ai in llm_update.get("new_action_items", []):
+            if ai.lower() not in existing_actions:
+                self.kb["action_items"].append({"item": ai, "status": "pending"})
+                changes["actions_added"] += 1
+        # Bump version
+        self.kb["metadata"]["version"] = self.kb["metadata"].get("version", 0) + 1
+        return changes
+    # ----------------------------------------------------------
+    # Internal: Report generation
+    # ----------------------------------------------------------
+    def _generate_report(self, filename: str, scored_sections: list[dict],
+                         llm_update: Optional[dict], changes: dict) -> Path:
+        """Generate analysis report as markdown."""
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        slug = re.sub(r'[^a-z0-9]', '-', filename.lower().rsplit('.', 1)[0])[:40]
+        report_path = self.kb_dir / f"{timestamp}_{slug}_analysis.md"
+        lines = [
+            f"# SOTA Analysis: {filename}",
+            "",
+            f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}  ",
+            f"**Source:** {filename}  ",
+            f"**Relevant sections:** {len(scored_sections)}  ",
+            f"**KB version:** {self.kb['metadata'].get('version', '?')}  ",
+            "",
+        ]
+        # Changes summary
+        total_changes = sum(changes.values())
+        if total_changes > 0:
+            lines.append("## Knowledge Base Updates")
+            lines.append("")
+            if changes["models_added"]:
+                lines.append(f"- **{changes['models_added']}** new model(s) added")
+            if changes["techniques_added"]:
+                lines.append(f"- **{changes['techniques_added']}** new technique(s) added")
+            if changes["stack_updated"]:
+                lines.append(f"- **{changes['stack_updated']}** tech stack update(s)")
+            if changes["principles_added"]:
+                lines.append(f"- **{changes['principles_added']}** new principle(s)")
+            if changes["actions_added"]:
+                lines.append(f"- **{changes['actions_added']}** new action item(s)")
+            lines.append("")
+        else:
+            lines.append("*No new knowledge extracted (document may cover already-tracked topics).*")
+            lines.append("")
+        # LLM-extracted details
+        if llm_update:
+            if llm_update.get("strategic_note"):
+                lines.append("## Strategic Note")
+                lines.append(llm_update["strategic_note"])
+                lines.append("")
+            for m in llm_update.get("new_models", []):
+                lines.append(f"### New Model: {m.get('name', '?')}")
+                lines.append(f"- **Params:** {m.get('params', '?')}")
+                lines.append(f"- **Why:** {m.get('why', '')}")
+                lines.append(f"- **GURMA.ai fit:** {m.get('gurma_fit', '')}")
+                lines.append("")
+            for t in llm_update.get("new_techniques", []):
+                lines.append(f"### New Technique: {t.get('name', '?')}")
+                lines.append(f"- **Category:** {t.get('category', '?')}")
+                lines.append(f"- **Why:** {t.get('why', '')}")
+                lines.append(f"- **GURMA.ai fit:** {t.get('gurma_fit', '')}")
+                lines.append(f"- **Priority:** {t.get('priority', '?')}")
+                lines.append("")
+            if llm_update.get("new_action_items"):
+                lines.append("## New Action Items")
+                lines.append("")
+                for ai in llm_update["new_action_items"]:
+                    lines.append(f"- [ ] {ai}")
+                lines.append("")
+        # Relevance-scored sections
+        if scored_sections:
+            lines.append("---")
+            lines.append("")
+            lines.append("## Relevance-Scored Sections")
+            lines.append("")
+            for s in scored_sections[:10]:
+                tags_str = ", ".join(s["tags"])
+                lines.append(f"**Score: {s['score']:.2f}** — tags: {tags_str}")
+                lines.append(f"> {s['text'][:300]}")
+                lines.append("")
+        with open(report_path, "w") as f:
+            f.write("\n".join(lines))
+        return report_path

tr_agents.py ADDED Viewed

	@@ -0,0 +1,480 @@

+#!/usr/bin/env python3
+"""
+GURMA.ai Turkish Research Agents - v2 (Enhanced)
+Two specialized agents for Turkish-language web research, enhanced with strategic
+context for Gurma AI's market entry.
+1.  **MaliMusavirAgent**: Researches company formation, tax, IP, and medical
+    device regulations.
+2.  **FonArastirmaAgent**: Researches R&D funding, focusing on bilateral
+    Swiss-Turkish opportunities and leveraging the BAMA partnership.
+Both agents search in Turkish and produce structured data for a Cursor agent
+(e.g., Claude Opus) to synthesize into actionable reports.
+Usage:
+    # Ensure gurma-context.md is in the same directory
+    python research.py mali                          # Full company formation research
+    python research.py fonlar -c tubitak             # Specific funding category
+"""
+from __future__ import annotations
+import json
+import re
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+try:
+    from .search import SearchService
+    from .config import RESEARCH_DIR
+except ImportError:
+    from search import SearchService
+    from config import RESEARCH_DIR
+# ============================================================
+# Turkish Authoritative Source Domains
+# ============================================================
+TR_PRIMARY_SOURCES = {
+    "tubitak.gov.tr", "kosgeb.gov.tr", "sanayi.gov.tr",
+    "ticaret.gov.tr", "ticaretsicil.gov.tr", "gib.gov.tr",
+    "resmigazete.gov.tr", "mevzuat.gov.tr", "iskur.gov.tr",
+    "yatirimadestek.gov.tr", "teydeb.tubitak.gov.tr",
+    "teknokent.org.tr", "teknopark.gov.tr", # .org.tr is also common
+    "btk.gov.tr", "kvkk.gov.tr", "titck.gov.tr", # Turkish Medicines and Medical Devices Agency
+    "ailevecalisma.gov.tr", "sgk.gov.tr",
+    "invest.gov.tr", # Investment Office of Turkey
+}
+# ============================================================
+# Agent 1: Mali Müşavir — Company Formation & Regulation
+# ============================================================
+MALI_CATEGORIES = {
+    "sirket_kurulum": {
+        "label": "Şirket Kuruluş Adımları",
+        "queries": [
+            "yabancı sermayeli teknoloji şirketi kuruluş adımları türkiye 2025 2026",
+            "limited şirket (ltd) ve anonim şirket (aş) kuruluş prosedürleri",
+            "türkiye'de şirket kurmak için gerekli belgeler ticaret sicil noter",
+            "isviçre merkezli bir şirketin türkiye'de şube veya yan kuruluş açması",
+            "online şirket kuruluşu mümkün mü MERSİS süreci",
+        ],
+    },
+    "sirket_turu": {
+        "label": "Şirket Türü Seçimi (AR-GE Odaklı)",
+        "queries": [
+            "ltd mi aş mi AR-GE ve yazılım şirketi için karşılaştırma 2026",
+            "anonim şirket ve limited şirket vergi ve sorumluluk farkları",
+            "yabancı ortaklı şirketler için en uygun şirket türü türkiye",
+            "devlet teşvikleri ve fonlara erişim için şirket türü önemli mi",
+        ],
+    },
+    "vergi_tesvik": {
+        "label": "Vergi ve Teşvikler (Teknoloji)",
+        "queries": [
+            "5746 sayılı AR-GE kanunu teşvikleri güncel 2025 2026",
+            "teknokent dışı AR-GE merkezi vergi avantajları",
+            "yazılım ve yapay zeka ihracatı vergi istisnaları türkiye",
+            "kurumlar vergisi ve KDV istisnası teknoloji şirketleri",
+            "SGK işveren primi desteği AR-GE personeli için",
+        ],
+    },
+    "teknokent_teknopark": {
+        "label": "Teknokent ve AR-GE Merkezleri",
+        "queries": [
+            "teknopark başvuru ve kabul kriterleri yapay zeka medikal cihaz",
+            "istanbul ankara izmir önde gelen teknoparklar ve uzmanlık alanları",
+            "teknopark avantajları vergi kira altyapı",
+            "AR-GE merkezi kurma şartları ve avantajları teknopark dışında",
+            "BAMA Teknoloji hangi teknoparkta yer alıyor",
+        ],
+    },
+    "maliyet_surec": {
+        "label": "Maliyet ve Süreç Takvimi",
+        "queries": [
+            "türkiye'de şirket kuruluş toplam maliyeti 2026 (noter harç sermaye)",
+            "şirket kuruluş süresi ortalama kaç gün 2026",
+            "kuruluş sonrası zorunlu adımlar (SGK vergi dairesi belediye)",
+            "aylık sabit giderler teknoloji şirketi (muhasebe bağkur sgk)",
+        ],
+    },
+    "ip_data_sovereignty": {
+        "label": "Fikri Mülkiyet ve Veri Mevzuatı",
+        "queries": [
+            "türkiye'de yazılım ve yapay zeka algoritması fikri mülkiyet koruması",
+            "KVKK (kişisel verilerin korunması kanunu) sağlık verileri yönetmeliği",
+            "sağlık verilerinin yurtdışına aktarımı KVKK izinler",
+            "anonimleştirilmiş veri ile AR-GE çalışması yasal çerçeve türkiye",
+            "isviçre-türkiye veri transferi anlaşmaları",
+        ],
+    },
+    "regulatory_medical": {
+        "label": "Medikal Cihaz Mevzuatı (AI Odaklı)",
+        "queries": [
+            "TİTCK yapay zeka tabanlı yazılımlar için medikal cihaz düzenlemesi",
+            "türkiye medikal cihaz yönetmeliği (MDR) ve CE işareti tanınırlığı",
+            "yapay zeka rehabilitasyon cihazları için klinik araştırma gereklilikleri türkiye",
+            "tıbbi cihaz kayıt ve onay süreci TİTCK ÜTS sistemi",
+            "yazılım bir tıbbi cihaz mıdır (SaMD) türkiye sınıflandırması",
+        ],
+    },
+}
+MALI_SYNTHESIS_QUESTIONS = {
+    "sirket_kurulum": [
+        "İsviçre merkezli Gurma AI için Türkiye'de bir yan kuruluş (subsidiary) kurmanın adımları nelerdir?",
+        "Gerekli ana belgeler nelerdir ve bu belgelerin İsviçre'den nasıl hazırlanması gerekir (apostil vb.)?",
+        "Sürecin ne kadarı uzaktan (online) yönetilebilir, hangi aşamalarda Türkiye'de fiziksel bulunma zorunludur?",
+    ],
+    "sirket_turu": [
+        "Gurma'nın AR-GE ve fon odaklı hedefleri için Ltd. mi A.Ş. mi daha mantıklı? Karar matrisi oluşturun.",
+        "Minimum sermaye gereksinimleri ve bu sermayenin blokesi/kullanımı nasıl işliyor?",
+        "Seçilen şirket türü, gelecekte yatırımcı alma veya hisse devri operasyonlarını nasıl etkiler?",
+    ],
+    "vergi_tesvik": [
+        "Gurma'nın yararlanabileceği temel vergi avantajları (Kurumlar, KDV, Gelir Vergisi Stopajı) nelerdir?",
+        "5746 sayılı kanun kapsamında, Teknopark içinde ve dışında olmanın avantaj/dezavantajları nelerdir?",
+        "Yapay zeka ve medikal cihaz ihracatı için özel bir vergi indirimi var mı?",
+    ],
+    "teknokent_teknopark": [
+        "Gurma'nın profiline (AI + Medikal Cihaz) en uygun 3 Teknopark hangisidir ve neden?",
+        "Teknopark'a kabul için proje başvurusunda nelere dikkat edilmeli? BAMA'nın deneyiminden nasıl yararlanılır?",
+        "Teknopark'ta yer almanın IP koruması ve veri güvenliği açısından ek bir avantajı var mı?",
+    ],
+    "maliyet_surec": [
+        "Bir A.Ş. kurmak için başlangıçta ne kadar bir bütçe ayrılmalı (minimum sermaye + masraflar)?",
+        "Şirketin yasal olarak faaliyete geçmesi ne kadar sürer? (iyimser ve kötümser senaryo)",
+        "Faaliyete geçtikten sonraki ilk 3 ay içinde tamamlanması gereken zorunlu işlemler nelerdir?",
+    ],
+    "ip_data_sovereignty": [
+        "Gurma'nın temel IP'si olan AI modellerini Türkiye'de nasıl koruma altına alabiliriz? (Patent, telif hakkı vb.)",
+        "KVKK uyarınca, rehabilitasyon verilerini işlerken nelere dikkat edilmeli? Veri Türkiye'de mi kalmalı?",
+        "İsviçre'deki ana şirket ile Türkiye'deki AR-GE birimi arasında veri (özellikle anonimleştirilmiş hasta verisi) transferi için yasal zemin nedir?",
+    ],
+    "regulatory_medical": [
+        "Gurma'nın AI yazılımı Türkiye'de bir 'tıbbi cihaz' olarak kabul edilecek mi? TİTCK'nın bu konudaki kriterleri nelerdir?",
+        "Eğer tıbbi cihaz ise, AB'den alınacak bir CE belgesi Türkiye'de doğrudan geçerli midir, yoksa ek TİTCK onayı gerekir mi?",
+        "Pazara sunmadan önce Türkiye'de bir klinik doğrulama/araştırma yapma zorunluluğu var mı?",
+    ],
+}
+# ============================================================
+# Agent 2: Fon Araştırma — TÜBİTAK & Stratejik Ortaklık
+# ============================================================
+FON_CATEGORIES = {
+    "tubitak": {
+        "label": "TÜBİTAK Destek Programları",
+        "queries": [
+            "TÜBİTAK TEYDEB 1501 1507 destek programları yapay zeka medikal cihaz 2026",
+            "TÜBİTAK 1702 patent lisanslama desteği yabancı teknoloji",
+            "TÜBİTAK yapay zeka enstitüsü proje çağrıları",
+            "TÜBİTAK sağlık bilimleri araştırma destek grubu (SBAG) çağrıları",
+            "yeni kurulan teknoloji şirketleri için TÜBİTAK BİGG programı şartları",
+        ],
+    },
+    "kosgeb": {
+        "label": "KOSGEB Destekleri",
+        "queries": [
+            "KOSGEB AR-GE ÜR-GE ve inovasyon destek programı 2026",
+            "KOSGEB teknoloji odaklı sanayi hamlesi programı medikal cihaz",
+            "yabancı ortaklı KOBİ'ler KOSGEB desteklerinden yararlanabilir mi",
+            "KOSGEB stratejik ürün destek programı şartları",
+        ],
+    },
+    "sanayi_bakanligi": {
+        "label": "Sanayi Bakanlığı ve Kalkınma Ajansları",
+        "queries": [
+            "sanayi ve teknoloji bakanlığı yatırım teşvik belgesi AR-GE yatırımı",
+            "ulusal yapay zeka stratejisi 2025 kapsamında açılan fon ve destekler",
+            "kalkınma ajansları (ISTKA IZMIRKA) güdümlü proje desteği sağlık teknolojileri",
+            "teknoloji geliştirme bölgeleri (TGB) ek destek ve hibeler",
+        ],
+    },
+    "ab_fonlari": {
+        "label": "AB ve Uluslararası Fonlar (İsviçre-Türkiye Odaklı)",
+        "queries": [
+            "TÜBİTAK-SNSF (İsviçre) ikili işbirliği programı ve başvuru şartlar��",
+            "EUREKA Eurostars programı türkiye isviçre ortaklığı",
+            "Horizon Europe programına türkiye'den katılım ve yapay zeka sağlık çağrıları",
+            "İsviçre ve Türkiye'nin ortak katıldığı uluslararası AR-GE fonları",
+        ],
+    },
+    "basvuru_surec": {
+        "label": "Başvuru Süreçleri ve Stratejileri",
+        "queries": [
+            "TÜBİTAK TEYDEB proje önerisi hazırlama kılavuzu ve hakem değerlendirme kriterleri",
+            "başarılı bir TÜBİTAK projesi bütçesi nasıl hazırlanır (personel makine hizmet alımı)",
+            "TÜBİTAK proje başvurularında sık yapılan hatalar ve reddedilme nedenleri",
+            "proje yürütücüsü ve AR-GE personeli nitelikleri TÜBİTAK kriterleri",
+        ],
+    },
+    "bama_joint_strategy": {
+        "label": "BAMA Ortaklığı ile Stratejik Fon Başvurusu",
+        "queries": [
+            "TÜBİTAK ortaklı proje başvurusu nasıl yapılır (KOBİ-KOBİ işbirliği)",
+            "BAMA Teknoloji'nin 'yerli üretici' statüsü fon başvurularında avantaj sağlar mı",
+            "BAMA Teknoloji'nin tamamladığı TÜBİTAK veya KOSGEB projeleri var mı",
+            "Gurma (yeni) ve BAMA (tecrübeli) ortaklığında bir proje kurgusu nasıl olmalı",
+        ],
+    },
+    "competitor_analysis_tr": {
+        "label": "Türkiye'deki Rakiplerin Fon Geçmişi",
+        "queries": [
+            "rehabilitasyon robotiği alanında TÜBİTAK desteği alan türk firmaları",
+            "Hocoma Ekso Bionics gibi yabancı firmaların türkiye'de aldığı teşvik var mı",
+            "medikal cihaz ve yazılım alanında başarılı KOSGEB projesi örnekleri",
+            "türkiye'de sağlık teknolojileri alanında yatırım alan startuplar ve aldıkları hibeler",
+        ],
+    },
+}
+FON_SYNTHESIS_QUESTIONS = {
+    "tubitak": [
+        "Gurma'nın mevcut durumu (yeni kuruluş, AI/medikal odaklı) için en uygun 2-3 TÜBİTAK programı hangisidir?",
+        "Bu programların sağladığı hibe oranı, toplam bütçe ve proje süresi nedir?",
+        "Yakın zamanda açılacak veya şu an açık olan özel bir 'yapay zeka' veya 'sağlık teknolojileri' çağrısı var mı?",
+    ],
+    "kosgeb": [
+        "Yeni kurulacak yabancı ortaklı bir şirket, KOSGEB'in hangi desteklerinden faydalanabilir?",
+        "KOSGEB mi TÜBİTAK mı? Gurma'nın AR-GE projesi için hangisi daha uygun bir başlangıç noktasıdır?",
+    ],
+    "sanayi_bakanligi": [
+        "'Yatırım Teşvik Belgesi' almanın Gurma için en somut faydaları neler olur? Süreç ne kadar karmaşık?",
+        "İstanbul veya İzmir'deki Kalkınma Ajansları, Gurma'nın projesine özel bir destek sağlayabilir mi?",
+    ],
+    "ab_fonlari": [
+        "**En Önemli Soru**: TÜBİTAK-SNSF (İsviçre) ikili işbirliği programının güncel durumu nedir? Başvuru tarihleri ve başarı oranları hakkında ne biliniyor?",
+        "Gurma (İsviçre) ve Gurma (Türkiye) arasında bir EUREKA projesi kurgulamak mümkün ve mantıklı mıdır?",
+    ],
+    "basvuru_surec": [
+        "Bir TÜBİTAK 1501 proje başvurusunun ana adımları ve zaman çizelgesi nedir?",
+        "Hakemlerin projeyi değerlendirirken en çok dikkat ettiği 3 kritik nokta nedir? (Örn: İnovatif yön, ticarileşme potansiyeli)",
+        "Proje bütçesinde hangi harcamalar desteklenir, hangileri desteklenmez?",
+    ],
+    "bama_joint_strategy": [
+        "Gurma ve BAMA'nın birlikte başvurabileceği en mantıklı fon hangisidir? Bu ortaklık başvuruda nasıl bir avantaj yaratır?",
+        "BAMA'nın mevcut deneyimi ve 'yerli üretici' kimliği, proje kabul şansını ne kadar artırır?",
+        "Ortak bir projede IP (fikri mülkiyet) paylaşımı nasıl düzenlenmelidir?",
+    ],
+    "competitor_analysis_tr": [
+        "Türkiye'de rehabilitasyon teknolojileri alanında kimler devlet desteği alıyor? Bu projelerin odak noktaları neler?",
+        "Rakiplerin aldığı destekler, pazarın hangi yöne gittiğini ve hangi teknolojilerin önceliklendirildiğini gösteriyor mu?",
+    ],
+}
+# ============================================================
+# Shared Dataclasses and Base Agent
+# (This section is largely unchanged from the original v1)
+# ============================================================
+@dataclass
+class TRIntelSection:
+    """A section of the Turkish research report."""
+    category: str
+    label: str
+    queries_executed: list = field(default_factory=list)
+    results: list = field(default_factory=list)
+    findings: list = field(default_factory=list)
+    gaps: list = field(default_factory=list)
+    sources: list = field(default_factory=list)
+class TRResearchAgent:
+    """Base agent for Turkish-language structured web research."""
+    CATEGORIES: dict = {}
+    SYNTHESIS_QUESTIONS: dict = {}
+    OUTPUT_SUBDIR: str = "tr"
+    REPORT_TITLE: str = "Türkçe Araştırma Raporu"
+    def __init__(self, search: SearchService = None):
+        self.search = search or SearchService()
+        self.sections: dict[str, TRIntelSection] = {}
+        self.output_dir = RESEARCH_DIR / self.OUTPUT_SUBDIR
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    def run(self, categories: list[str] = None, delay: float = 1.0, max_results: int = 10) -> Path:
+        cats_to_run = categories or list(self.CATEGORIES.keys())
+        total_queries = sum(len(self.CATEGORIES[c]["queries"]) for c in cats_to_run if c in self.CATEGORIES)
+        print(f"\n{'='*60}")
+        print(f"{self.REPORT_TITLE}")
+        print(f"Kategoriler: {len(cats_to_run)} | Sorgular: ~{total_queries}")
+        print(f"{'='*60}\n")
+        for cat_key in cats_to_run:
+            if cat_key not in self.CATEGORIES:
+                print(f"[ATLA] Bilinmeyen kategori: {cat_key}")
+                continue
+            cat_details = self.CATEGORIES[cat_key]
+            section = TRIntelSection(category=cat_key, label=cat_details["label"])
+            self._research_category(section, cat_details, delay, max_results)
+            self.sections[cat_key] = section
+        report_path = self._generate_report()
+        self._save_data()
+        print(f"\n{'='*60}")
+        print(f"Rapor ve Veri Dosyaları Oluşturuldu: {self.output_dir}")
+        total_findings = sum(len(s.findings) for s in self.sections.values())
+        print(f"Toplam Bulgular: {total_findings}")
+        print(f"{'='*60}\n")
+        return report_path
+    def _research_category(self, section: TRIntelSection, cat_details: dict, delay: float, max_results: int):
+        print(f"\n--- Kategori: {section.label} ---")
+        for query in cat_details["queries"]:
+            print(f"  [ARAMA] {query}")
+            try:
+                results = self.search.search(query, max_results=max_results, save=False) # Disable saving intermediate results for now
+                section.queries_executed.append(query)
+                section.results.extend(results)
+                section.sources.extend(r.url for r in results if r.url and r.url not in section.sources)
+                print(f"           -> {len(results)} sonuç bulundu.")
+            except Exception as e:
+                print(f"           -> Arama sırasında hata: {e}")
+            if delay > 0:
+                time.sleep(delay)
+        section.findings = self._analyze_and_deduplicate(section)
+        section.gaps = self._detect_gaps(section)
+        confirmed_count = sum(1 for f in section.findings if f.get("confirmed"))
+        print(f"  [ANALİZ] {len(section.findings)} özgün bulgu ({confirmed_count} resmi kaynaklı). Gaps: {len(section.gaps)}")
+    def _analyze_and_deduplicate(self, section: TRIntelSection) -> list[dict]:
+        findings = []
+        seen_snippets = set()
+        for r in sorted(section.results, key=lambda x: x.url):
+            # Basic deduplication based on snippet
+            snippet_key = re.sub(r'[^a-z0-9]', '', r.snippet.lower()[:100])
+            if snippet_key in seen_snippets:
+                continue
+            seen_snippets.add(snippet_key)
+            is_primary = any(domain in r.url for domain in TR_PRIMARY_SOURCES) if r.url else False
+            findings.append({
+                "text": f"{r.title}: {r.snippet}",
+                "confirmed": is_primary,
+                "source": r.url or "",
+            })
+        return findings[:25] # Limit findings per section
+    def _detect_gaps(self, section: TRIntelSection) -> list[dict]:
+        questions = self.SYNTHESIS_QUESTIONS.get(section.category, [])
+        if not questions:
+            return []
+        all_text = " ".join(f["text"].lower() for f in section.findings)
+        gaps = []
+        for q in questions:
+            # Simple keyword matching to detect gaps
+            keywords = [w for w in re.findall(r'\w{4,}', q.lower()) if len(w) > 3]
+            matches = sum(1 for kw in keywords if kw in all_text)
+            if not all_text or matches < len(keywords) * 0.2:
+                gaps.append({"text": q})
+        return gaps
+    def _generate_report(self) -> Path:
+        # This is a lightweight data dump; main report is synthesized by Cursor
+        timestamp = datetime.now().strftime("%Y-%m-%d")
+        slug = self.OUTPUT_SUBDIR.replace("/", "-")
+        report_path = self.output_dir / f"{slug}-rapor-{timestamp}.md"
+        lines = [f"# {self.REPORT_TITLE} - Veri Dökümü", f"Tarih: {datetime.now().isoformat()}", ""]
+        for cat_key, section in self.sections.items():
+            lines.extend([f"## {section.label}", ""])
+            lines.append("### Bulgular")
+            for f in section.findings:
+                tag = "✅" if f.get("confirmed") else "⚠️"
+                lines.append(f"- {tag} {f.get('text', '')} ([Kaynak]({f.get('source', '#')}))")
+            lines.append("\n### Cevaplanması Gereken Sentez Soruları")
+            for q in self.SYNTHESIS_QUESTIONS.get(cat_key, []):
+                lines.append(f"- {q}")
+            lines.append("")
+        report_path.write_text("\n".join(lines), encoding="utf-8")
+        return report_path
+    def _save_data(self):
+        timestamp = datetime.now().strftime("%Y-%m-%d")
+        slug = self.OUTPUT_SUBDIR.replace("/", "-")
+        data_path = self.output_dir / f"{slug}-data-{timestamp}.json"
+        data = {
+            "agent": self.__class__.__name__,
+            "timestamp": datetime.now().isoformat(),
+            "sections": {
+                key: {
+                    "category": s.category,
+                    "label": s.label,
+                    "queries_executed": s.queries_executed,
+                    "findings": s.findings,
+                    "gaps": s.gaps,
+                    "sources": s.sources,
+                    "synthesis_questions": self.SYNTHESIS_QUESTIONS.get(key, []),
+                }
+                for key, s in self.sections.items()
+            },
+        }
+        data_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+    def list_categories(self):
+        print("Mevcut kategoriler:")
+        for key, cat in self.CATEGORIES.items():
+            print(f"  - {key}: {cat['label']}")
+# ============================================================
+# Agent Implementations
+# ============================================================
+class MaliMusavirAgent(TRResearchAgent):
+    """Researches company formation, tax, IP, and medical device regulations."""
+    CATEGORIES = MALI_CATEGORIES
+    SYNTHESIS_QUESTIONS = MALI_SYNTHESIS_QUESTIONS
+    OUTPUT_SUBDIR = "tr-mali"
+    REPORT_TITLE = "Mali Müşavir Raporu — Şirket Kuruluşu ve Mevzuat"
+class FonArastirmaAgent(TRResearchAgent):
+    """Researches R&D funding, focusing on bilateral Swiss-Turkish opportunities."""
+    CATEGORIES = FON_CATEGORIES
+    SYNTHESIS_QUESTIONS = FON_SYNTHESIS_QUESTIONS
+    OUTPUT_SUBDIR = "tr-fonlar"
+    REPORT_TITLE = "Fon Araştırma Raporu — TÜBİTAK ve Stratejik Destekler"
+if __name__ == "__main__":
+    import argparse as _ap
+    parser = _ap.ArgumentParser(description="GURMA.ai Turkish Research Agents")
+    parser.add_argument("agent", choices=["mali", "fonlar"], help="Agent to run")
+    parser.add_argument("-c", "--categories", nargs="+", help="Specific categories to run")
+    parser.add_argument("-d", "--delay", type=float, default=1.0, help="Delay between searches")
+    parser.add_argument("--list-categories", action="store_true")
+    args = parser.parse_args()
+    agent_map = {"mali": MaliMusavirAgent, "fonlar": FonArastirmaAgent}
+    agent_instance = agent_map[args.agent]()
+    if args.list_categories:
+        agent_instance.list_categories()
+    else:
+        agent_instance.run(categories=args.categories, delay=args.delay)

tr_tab.py ADDED Viewed

	@@ -0,0 +1,218 @@

+"""
+GURMA.ai — Turkey Expansion Tab
+Displays research results from Mali and Fonlar agents,
+plus executive summary reports.
+"""
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+import streamlit as st
+# ============================================================
+# Environment & Paths
+# ============================================================
+IS_HF_SPACE = os.getenv("HF_SPACE") or Path("/app/research.py").exists()
+if IS_HF_SPACE:
+    DATA_ROOT = Path("/app/data")
+    DOCS_ROOT = Path("/app/docs")
+else:
+    DATA_ROOT = Path(__file__).resolve().parent.parent.parent / "data"
+    DOCS_ROOT = Path(__file__).resolve().parent.parent.parent / "docs"
+TR_MALI_DIR = DATA_ROOT / "tr-mali"
+TR_FONLAR_DIR = DATA_ROOT / "tr-fonlar"
+AGENT_CONFIG = {
+    "tr-mali": {
+        "label": "Mali Müşavir",
+        "icon": "🏛️",
+        "dir": TR_MALI_DIR,
+        "description": "Company formation, tax, IP, regulatory",
+    },
+    "tr-fonlar": {
+        "label": "Fon Araştırma",
+        "icon": "💰",
+        "dir": TR_FONLAR_DIR,
+        "description": "TÜBİTAK, KOSGEB, EU/bilateral funding",
+    },
+}
+# ============================================================
+# Data Loading
+# ============================================================
+@st.cache_data(ttl=120)
+def _load_tr_data(agent_key: str) -> list[dict]:
+    """Load all JSON data files for a TR agent, newest first."""
+    agent_dir = AGENT_CONFIG[agent_key]["dir"]
+    if not agent_dir.exists():
+        return []
+    results = []
+    for f in sorted(agent_dir.glob("*.json"), reverse=True):
+        try:
+            with open(f) as fh:
+                data = json.load(fh)
+            data["_filename"] = f.name
+            results.append(data)
+        except Exception:
+            continue
+    return results
+@st.cache_data(ttl=120)
+def _load_exec_summaries() -> list[dict]:
+    """Load executive summary markdown files, newest first."""
+    if not DOCS_ROOT.exists():
+        return []
+    summaries = []
+    for f in sorted(DOCS_ROOT.glob("exec-summary-*.md"), reverse=True):
+        try:
+            content = f.read_text(encoding="utf-8")
+            title_line = ""
+            for line in content.split("\n"):
+                if line.startswith("# "):
+                    title_line = line[2:].strip()
+                    break
+            summaries.append({
+                "filename": f.name,
+                "title": title_line or f.stem,
+                "content": content,
+                "mtime": datetime.fromtimestamp(f.stat().st_mtime),
+            })
+        except Exception:
+            continue
+    return summaries
+# ============================================================
+# Rendering Helpers
+# ============================================================
+def _render_finding(finding: dict):
+    """Render a single research finding with source quality indicator."""
+    text = finding.get("text", "")
+    source = finding.get("source", "")
+    confirmed = finding.get("confirmed", False)
+    if not text or len(text.strip()) < 20:
+        return
+    color = "#2ecc71" if confirmed else "#e67e22"
+    tag = "✅" if confirmed else "⚠️"
+    domain = ""
+    if source:
+        try:
+            from urllib.parse import urlparse
+            domain = urlparse(source).netloc
+            if domain.startswith("www."):
+                domain = domain[4:]
+        except Exception:
+            domain = source[:40]
+    truncated = text[:250] + "..." if len(text) > 250 else text
+    source_html = f" <a href='{source}' style='color:#888;font-size:0.75em;'>{domain}</a>" if source else ""
+    st.markdown(
+        f"{tag} <span style='font-size:0.88em;'>{truncated}</span>{source_html}",
+        unsafe_allow_html=True,
+    )
+def _render_section(section: dict):
+    """Render a research section (category) with findings and gaps."""
+    label = section.get("label", section.get("category", "Unknown"))
+    findings = section.get("findings", [])
+    gaps = section.get("gaps", [])
+    synthesis_qs = section.get("synthesis_questions", [])
+    confirmed_count = sum(1 for f in findings if isinstance(f, dict) and f.get("confirmed"))
+    total = len(findings)
+    header = f"**{label}** — {total} findings"
+    if confirmed_count:
+        header += f" ({confirmed_count} official)"
+    if gaps:
+        header += f" · {len(gaps)} gaps"
+    with st.expander(header, expanded=False):
+        if synthesis_qs:
+            st.caption("**Key questions:** " + " · ".join(synthesis_qs))
+            st.markdown("")
+        for f in findings[:12]:
+            if isinstance(f, dict):
+                _render_finding(f)
+        if len(findings) > 12:
+            st.caption(f"... and {len(findings) - 12} more findings")
+        if gaps:
+            st.markdown("---")
+            for g in gaps:
+                gap_text = g.get("text", g) if isinstance(g, dict) else g
+                st.caption(f"🔍 **Gap:** {gap_text}")
+def _render_agent_data(agent_key: str, data_files: list[dict]):
+    """Render all data for one TR agent."""
+    config = AGENT_CONFIG[agent_key]
+    if not data_files:
+        st.info(f"No data files found in `data/{agent_key}/`. Run the agent first.")
+        return
+    latest = data_files[0]
+    timestamp = latest.get("timestamp", "")[:16].replace("T", " ")
+    sections = latest.get("sections", {})
+    st.caption(f"Latest run: {timestamp} · {len(sections)} categories · File: `{latest.get('_filename', '')}`")
+    for section_data in sections.values():
+        _render_section(section_data)
+# ============================================================
+# Main Entry Point
+# ============================================================
+def render_tr_tab():
+    """Main entry point — called from app.py."""
+    st.title("Turkey Expansion")
+    st.caption("Company formation research & funding intelligence for Gurma Turkey")
+    # --- Executive Summaries ---
+    summaries = _load_exec_summaries()
+    if summaries:
+        st.header("Executive Summaries")
+        for s in summaries:
+            age = datetime.now() - s["mtime"]
+            age_label = "today" if age.days == 0 else f"{age.days}d ago"
+            with st.expander(f"📋 {s['title']}  ({age_label})", expanded=len(summaries) <= 2):
+                st.markdown(s["content"])
+        st.divider()
+    # --- Agent Research Data ---
+    st.header("Research Data")
+    agent_tabs = st.tabs([
+        f"{cfg['icon']} {cfg['label']}" for cfg in AGENT_CONFIG.values()
+    ])
+    for tab, agent_key in zip(agent_tabs, AGENT_CONFIG.keys()):
+        with tab:
+            config = AGENT_CONFIG[agent_key]
+            st.caption(config["description"])
+            data_files = _load_tr_data(agent_key)
+            _render_agent_data(agent_key, data_files)