Spaces:

CaffeinatedCoding
/

nyayasetu

Running

App Files Files Community

CaffeinatedCoding commited on 14 days ago

Commit

7d0fa43

verified ·

1 Parent(s): 34df332

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

api/main.py +90 -4
frontend/app.js +104 -0
frontend/index.html +58 -0
frontend/style.css +171 -1
src/agent_v2.py +7 -1
src/logger.py +64 -0
src/reranker.py +104 -0
src/verify.py +4 -22

api/main.py CHANGED Viewed

@@ -5,7 +5,7 @@ V2 agent with conversation memory and 3-pass reasoning.
 Port 7860 for HuggingFace Spaces compatibility.
 """
-from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
@@ -15,10 +15,14 @@ import time
 import os
 import sys
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -82,6 +86,9 @@ download_models()
 from src.ner import load_ner_model
 load_ner_model()
 from src.citation_graph import load_citation_graph
 load_citation_graph()
@@ -119,6 +126,7 @@ class QueryResponse(BaseModel):
     num_sources: int
     truncated: bool
     latency_ms: float
 @app.get("/")
@@ -134,13 +142,14 @@ def health():
 @app.post("/query", response_model=QueryResponse)
-def query(request: QueryRequest):
     if not request.query.strip():
         raise HTTPException(status_code=400, detail="Query cannot be empty")
     if len(request.query) < 10:
         raise HTTPException(status_code=400, detail="Query too short — minimum 10 characters")
     if len(request.query) > 1000:
         raise HTTPException(status_code=400, detail="Query too long — maximum 1000 characters")
     start = time.time()
     try:
         if USE_V2:
@@ -148,8 +157,85 @@ def query(request: QueryRequest):
             result = _run_query(request.query, session_id)
         else:
             result = _run_query_v1(request.query)
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
-    result["latency_ms"] = round((time.time() - start) * 1000, 2)
-    return result

 Port 7860 for HuggingFace Spaces compatibility.
 """
+from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 import os
 import sys
 import logging
+import json
+from collections import Counter
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+from src.logger import log_inference
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from src.ner import load_ner_model
 load_ner_model()
+from src.reranker import load_reranker
+load_reranker()
 from src.citation_graph import load_citation_graph
 load_citation_graph()
     num_sources: int
     truncated: bool
     latency_ms: float
+    session_id: Optional[str] = None
 @app.get("/")
 @app.post("/query", response_model=QueryResponse)
+def query(request: QueryRequest, background_tasks: BackgroundTasks):
     if not request.query.strip():
         raise HTTPException(status_code=400, detail="Query cannot be empty")
     if len(request.query) < 10:
         raise HTTPException(status_code=400, detail="Query too short — minimum 10 characters")
     if len(request.query) > 1000:
         raise HTTPException(status_code=400, detail="Query too long — maximum 1000 characters")
     start = time.time()
     try:
         if USE_V2:
             result = _run_query(request.query, session_id)
         else:
             result = _run_query_v1(request.query)
+            session_id = "v1"
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
+    latency_ms = round((time.time() - start) * 1000, 2)
+    result["latency_ms"] = latency_ms
+    result["session_id"] = session_id
+    # Log inference as background task — non-blocking
+    background_tasks.add_task(
+        log_inference,
+        query=request.query,
+        session_id=session_id,
+        answer=result.get("answer", ""),
+        num_sources=result.get("num_sources", 0),
+        verification_status=result.get("verification_status", False),
+        entities=result.get("entities", {}),
+        latency_ms=latency_ms,
+        stage=result.get("analysis", {}).get("stage", ""),
+        truncated=result.get("truncated", False),
+        out_of_domain=result.get("num_sources", 0) == 0,
+    )
+    return result
+@app.get("/analytics")
+def analytics():
+    """Return aggregated analytics from inference logs."""
+    log_path = os.getenv("LOG_PATH", "logs/inference.jsonl")
+    if not os.path.exists(log_path):
+        return {
+            "total_queries": 0,
+            "verified_ratio": 0,
+            "avg_latency_ms": 0,
+            "out_of_domain_rate": 0,
+            "avg_sources": 0,
+            "stage_distribution": {},
+            "entity_type_frequency": {},
+            "recent_latencies": [],
+        }
+    records = []
+    try:
+        with open(log_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    try:
+                        records.append(json.loads(line))
+                    except Exception:
+                        continue
+    except Exception:
+        return {"error": "Could not read logs"}
+    if not records:
+        return {"total_queries": 0}
+    total = len(records)
+    verified = sum(1 for r in records if r.get("verified", False))
+    out_of_domain = sum(1 for r in records if r.get("out_of_domain", False))
+    latencies = [r.get("latency_ms", 0) for r in records if r.get("latency_ms")]
+    sources = [r.get("num_sources", 0) for r in records]
+    stages = Counter(r.get("stage", "unknown") for r in records)
+    all_entity_types = []
+    for r in records:
+        all_entity_types.extend(r.get("entities_found", []))
+    entity_freq = dict(Counter(all_entity_types).most_common(10))
+    return {
+        "total_queries": total,
+        "verified_ratio": round(verified / total * 100, 1) if total else 0,
+        "avg_latency_ms": round(sum(latencies) / len(latencies), 0) if latencies else 0,
+        "out_of_domain_rate": round(out_of_domain / total * 100, 1) if total else 0,
+        "avg_sources": round(sum(sources) / len(sources), 1) if sources else 0,
+        "stage_distribution": dict(stages),
+        "entity_type_frequency": entity_freq,
+        "recent_latencies": latencies[-20:],
+    }

frontend/app.js CHANGED Viewed

@@ -371,4 +371,108 @@ function inline(text) {
 function showToast(msg) {
   alert(msg);
 }

 function showToast(msg) {
   alert(msg);
+}
+// ── Analytics ────────────────────────────────────────────────────────
+async function showAnalytics() {
+  showScreen("analytics");
+  document.getElementById("topbar-title").textContent = "System Analytics";
+  await loadAnalytics();
+}
+async function loadAnalytics() {
+  try {
+    const res = await fetch(`${API_BASE}/analytics`);
+    const data = await res.json();
+    if (data.total_queries === 0) {
+      document.getElementById("stat-total").textContent = "0";
+      document.getElementById("stat-verified").textContent = "—";
+      document.getElementById("stat-latency").textContent = "—";
+      document.getElementById("stat-ood").textContent = "—";
+      document.getElementById("stat-sources").textContent = "—";
+      document.getElementById("chart-stages").innerHTML = "<p class='no-data'>No queries yet. Start asking questions.</p>";
+      document.getElementById("chart-entities").innerHTML = "<p class='no-data'>No entity data yet.</p>";
+      document.getElementById("chart-latency").innerHTML = "<p class='no-data'>No latency data yet.</p>";
+      return;
+    }
+    // Stat cards
+    document.getElementById("stat-total").textContent = data.total_queries;
+    document.getElementById("stat-verified").textContent = data.verified_ratio + "%";
+    document.getElementById("stat-latency").textContent = data.avg_latency_ms + "ms";
+    document.getElementById("stat-ood").textContent = data.out_of_domain_rate + "%";
+    document.getElementById("stat-sources").textContent = data.avg_sources;
+    // Stage distribution bar chart
+    renderBarChart("chart-stages", data.stage_distribution);
+    // Entity frequency bar chart
+    renderBarChart("chart-entities", data.entity_type_frequency);
+    // Latency sparkline
+    renderSparkline("chart-latency", data.recent_latencies);
+  } catch (err) {
+    document.getElementById("chart-stages").innerHTML = "<p class='no-data'>Could not load analytics.</p>";
+  }
+}
+function renderBarChart(containerId, data) {
+  const container = document.getElementById(containerId);
+  if (!data || Object.keys(data).length === 0) {
+    container.innerHTML = "<p class='no-data'>No data yet.</p>";
+    return;
+  }
+  const max = Math.max(...Object.values(data));
+  const html = Object.entries(data)
+    .sort((a, b) => b[1] - a[1])
+    .map(([label, value]) => `
+      <div class="bar-row">
+        <span class="bar-label">${escHtml(label)}</span>
+        <div class="bar-track">
+          <div class="bar-fill" style="width: ${Math.round(value / max * 100)}%"></div>
+        </div>
+        <span class="bar-value">${value}</span>
+      </div>
+    `).join("");
+  container.innerHTML = `<div class="bar-chart">${html}</div>`;
+}
+function renderSparkline(containerId, latencies) {
+  const container = document.getElementById(containerId);
+  if (!latencies || latencies.length === 0) {
+    container.innerHTML = "<p class='no-data'>No data yet.</p>";
+    return;
+  }
+  const max = Math.max(...latencies);
+  const min = Math.min(...latencies);
+  const range = max - min || 1;
+  const height = 60;
+  const width = 300;
+  const step = width / (latencies.length - 1 || 1);
+  const points = latencies.map((v, i) => {
+    const x = i * step;
+    const y = height - ((v - min) / range) * height;
+    return `${x},${y}`;
+  }).join(" ");
+  container.innerHTML = `
+    <svg viewBox="0 0 ${width} ${height}" class="sparkline">
+      <polyline points="${points}" fill="none" stroke="var(--accent)" stroke-width="2"/>
+    </svg>
+    <div class="sparkline-range">
+      <span>${Math.round(min)}ms min</span>
+      <span>${Math.round(max)}ms max</span>
+    </div>
+  `;
+}
+function escHtml(text) {
+  const map = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#039;' };
+  return String(text).replace(/[&<>"']/g, m => map[m]);
 }

frontend/index.html CHANGED Viewed

@@ -27,6 +27,11 @@
       New Research Session
     </button>
     <div class="sidebar-section-label">SESSIONS</div>
     <div id="sessions-list" class="sessions-list">
       <div class="sessions-empty">No sessions yet</div>
@@ -87,6 +92,59 @@
       </div>
     </div>
     <!-- ── SOURCES PANEL ── -->
     <div id="sources-panel" class="sources-panel">
       <div class="sources-panel-header">

       New Research Session
     </button>
+    <button class="analytics-btn" onclick="showAnalytics()">
+      <span class="analytics-icon">📊</span>
+      System Analytics
+    </button>
     <div class="sidebar-section-label">SESSIONS</div>
     <div id="sessions-list" class="sessions-list">
       <div class="sessions-empty">No sessions yet</div>
       </div>
     </div>
+    <!-- ── ANALYTICS SCREEN ── -->
+    <div id="screen-analytics" class="screen screen-analytics">
+      <div class="analytics-inner">
+        <div class="analytics-header">
+          <h2>System Analytics</h2>
+          <p>Live metrics from inference logs</p>
+        </div>
+        <div class="analytics-grid">
+          <div class="stat-card">
+            <div class="stat-value" id="stat-total">—</div>
+            <div class="stat-label">Total Queries</div>
+          </div>
+          <div class="stat-card">
+            <div class="stat-value" id="stat-verified">—</div>
+            <div class="stat-label">Verified Rate</div>
+          </div>
+          <div class="stat-card">
+            <div class="stat-value" id="stat-latency">—</div>
+            <div class="stat-label">Avg Latency</div>
+          </div>
+          <div class="stat-card">
+            <div class="stat-value" id="stat-ood">—</div>
+            <div class="stat-label">Out-of-Domain Rate</div>
+          </div>
+          <div class="stat-card">
+            <div class="stat-value" id="stat-sources">—</div>
+            <div class="stat-label">Avg Sources / Query</div>
+          </div>
+        </div>
+        <div class="analytics-charts">
+          <div class="chart-card">
+            <h3>Stage Distribution</h3>
+            <div id="chart-stages" class="chart-container"></div>
+          </div>
+          <div class="chart-card">
+            <h3>Entity Types Extracted</h3>
+            <div id="chart-entities" class="chart-container"></div>
+          </div>
+          <div class="chart-card">
+            <h3>Recent Query Latencies (ms)</h3>
+            <div id="chart-latency" class="chart-container"></div>
+          </div>
+        </div>
+        <div class="analytics-footer">
+          <button class="refresh-btn" onclick="loadAnalytics()">↻ Refresh</button>
+          <span class="analytics-note">Data from current session logs. Resets on container restart.</span>
+        </div>
+      </div>
+    </div>
     <!-- ── SOURCES PANEL ── -->
     <div id="sources-panel" class="sources-panel">
       <div class="sources-panel-header">

frontend/style.css CHANGED Viewed

@@ -750,4 +750,174 @@ body {
   margin-bottom: 10px;
 }
-.bubble-ai p:last-child { margin-bottom: 0; }

   margin-bottom: 10px;
 }
+.bubble-ai p:last-child { margin-bottom: 0; }
+/* ── Analytics ────────────────────────────────────────────── */
+.analytics-btn {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  width: 100%;
+  padding: 10px 14px;
+  margin-top: 8px;
+  background: transparent;
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  color: var(--text-2);
+  font-size: 13px;
+  cursor: pointer;
+  transition: all var(--transition);
+}
+.analytics-btn:hover {
+  background: var(--navy-3);
+  color: var(--text-1);
+}
+.screen-analytics {
+  padding: 32px;
+  overflow-y: auto;
+  height: 100%;
+}
+.analytics-inner {
+  max-width: 800px;
+  margin: 0 auto;
+}
+.analytics-header h2 {
+  font-family: 'Cormorant Garamond', serif;
+  font-size: 28px;
+  margin: 0 0 4px;
+}
+.analytics-header p {
+  color: var(--text-2);
+  font-size: 14px;
+  margin: 0 0 32px;
+}
+.analytics-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
+  gap: 16px;
+  margin-bottom: 32px;
+}
+.stat-card {
+  background: var(--navy-2);
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 20px 16px;
+  text-align: center;
+}
+.stat-value {
+  font-size: 28px;
+  font-weight: 600;
+  color: var(--text-1);
+  font-family: 'Cormorant Garamond', serif;
+}
+.stat-label {
+  font-size: 11px;
+  color: var(--text-3);
+  margin-top: 4px;
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+}
+.analytics-charts {
+  display: flex;
+  flex-direction: column;
+  gap: 24px;
+}
+.chart-card {
+  background: var(--navy-2);
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 20px;
+}
+.chart-card h3 {
+  font-size: 14px;
+  font-weight: 500;
+  margin: 0 0 16px;
+  color: var(--text-2);
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+}
+.chart-container {
+  min-height: 60px;
+}
+.no-data {
+  color: var(--text-3);
+  font-size: 13px;
+  text-align: center;
+  padding: 16px 0;
+}
+.bar-chart {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+.bar-row {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  font-size: 12px;
+}
+.bar-label {
+  width: 100px;
+  color: var(--text-3);
+  text-align: right;
+  flex-shrink: 0;
+}
+.bar-track {
+  flex: 1;
+  height: 8px;
+  background: var(--navy-3);
+  border-radius: 4px;
+  overflow: hidden;
+}
+.bar-fill {
+  height: 100%;
+  background: var(--gold);
+  border-radius: 4px;
+  transition: width 0.4s ease;
+}
+.bar-value {
+  width: 30px;
+  color: var(--text-1);
+  font-weight: 500;
+  text-align: right;
+}
+.sparkline {
+  width: 100%;
+  height: 60px;
+}
+.sparkline-range {
+  display: flex;
+  justify-content: space-between;
+  font-size: 11px;
+  color: var(--text-3);
+  margin-top: 4px;
+}
+.analytics-footer {
+  display: flex;
+  align-items: center;
+  gap: 16px;
+  margin-top: 24px;
+}
+.refresh-btn {
+  padding: 8px 16px;
+  background: var(--navy-3);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  color: var(--text-1);
+  font-size: 13px;
+  cursor: pointer;
+  transition: background var(--transition);
+}
+.refresh-btn:hover {
+  background: var(--navy-4);
+}
+.analytics-note {
+  font-size: 12px;
+  color: var(--text-3);
+}

src/agent_v2.py CHANGED Viewed

@@ -384,7 +384,13 @@ def run_query_v2(user_message: str, session_id: str) -> Dict[str, Any]:
     chunks = []
     try:
-        chunks = retrieve_parallel(search_queries[:3], top_k=5)
         # Add precedent chain
         from src.citation_graph import get_precedent_chain
         retrieved_ids = [c.get("judgment_id", "") for c in chunks]

     chunks = []
     try:
+        # Retrieve more candidates for reranker to work with
+        raw_chunks = retrieve_parallel(search_queries[:3], top_k=10)
+        # Rerank candidates by true relevance
+        from src.reranker import rerank
+        chunks = rerank(user_message, raw_chunks, top_k=5)
         # Add precedent chain
         from src.citation_graph import get_precedent_chain
         retrieved_ids = [c.get("judgment_id", "") for c in chunks]

src/logger.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+Inference logger.
+Writes one JSON line per query to logs/inference.jsonl.
+Called as FastAPI BackgroundTask — does not block response.
+WHY two-layer logging?
+HF Spaces containers are ephemeral — local files are wiped on restart.
+Local JSONL is fast for same-session analytics.
+In future, add HF Dataset API push here for durable storage.
+"""
+import json
+import os
+import logging
+from datetime import datetime, timezone
+logger = logging.getLogger(__name__)
+LOG_PATH = os.getenv("LOG_PATH", "logs/inference.jsonl")
+def ensure_log_dir():
+    os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
+def log_inference(
+    query: str,
+    session_id: str,
+    answer: str,
+    num_sources: int,
+    verification_status,
+    entities: dict,
+    latency_ms: float,
+    stage: str = "",
+    truncated: bool = False,
+    out_of_domain: bool = False,
+):
+    """
+    Write one inference record to logs/inference.jsonl.
+    Called as BackgroundTask in api/main.py.
+    Fails silently — never blocks or crashes the main response.
+    """
+    try:
+        ensure_log_dir()
+        record = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "session_id": session_id,
+            "query_length": len(query),
+            "query_hash": hash(query) % 100000,
+            "num_sources": num_sources,
+            "verification_status": str(verification_status),
+            "verified": verification_status is True or verification_status == "verified",
+            "entities_found": list(entities.keys()) if entities else [],
+            "num_entity_types": len(entities) if entities else 0,
+            "latency_ms": latency_ms,
+            "stage": stage,
+            "truncated": truncated,
+            "out_of_domain": out_of_domain,
+            "answer_length": len(answer),
+        }
+        with open(LOG_PATH, "a", encoding="utf-8") as f:
+            f.write(json.dumps(record) + "\n")
+    except Exception as e:
+        logger.warning(f"Inference logging failed: {e}")

src/reranker.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+Cross-encoder reranker.
+Reranks FAISS retrieval results by true query-document relevance.
+WHY cross-encoder over bi-encoder (MiniLM)?
+MiniLM embeds query and document independently — fast but approximate.
+Cross-encoder sees query+document together — slower but much more accurate.
+Used post-retrieval on top-15 candidates to select best top-5.
+WHY ms-marco-MiniLM-L-6-v2?
+Trained on MS-MARCO passage ranking — transfers well to legal QA.
+Small enough to load on HF Spaces free tier (~80MB).
+Fast enough for reranking 15 candidates in ~200ms on CPU.
+Interview answer:
+"I added a cross-encoder reranker post-retrieval to boost precision@5
+by focusing on true relevance rather than embedding similarity alone.
+Legal domain papers show 8-15% precision lift from reranking."
+"""
+import logging
+from typing import List, Dict
+logger = logging.getLogger(__name__)
+_reranker = None
+_reranker_loaded = False
+def load_reranker():
+    """
+    Load cross-encoder once at startup.
+    Fails gracefully — retrieval works without reranker.
+    Call from api/main.py after other models load.
+    """
+    global _reranker, _reranker_loaded
+    try:
+        from sentence_transformers import CrossEncoder
+        logger.info("Loading cross-encoder reranker...")
+        _reranker = CrossEncoder(
+            "cross-encoder/ms-marco-MiniLM-L-6-v2",
+            max_length=512
+        )
+        _reranker_loaded = True
+        logger.info("Cross-encoder reranker ready")
+    except Exception as e:
+        logger.warning(f"Reranker load failed: {e}. Retrieval will use FAISS scores only.")
+        _reranker_loaded = False
+def rerank(query: str, chunks: List[Dict], top_k: int = 5) -> List[Dict]:
+    """
+    Rerank chunks by cross-encoder relevance score.
+    Args:
+        query: user query string
+        chunks: list of retrieved chunks from FAISS
+        top_k: number of top chunks to return after reranking
+    Returns:
+        top_k chunks sorted by reranker score descending.
+        If reranker not loaded, returns original chunks[:top_k].
+    """
+    if not _reranker_loaded or _reranker is None:
+        return chunks[:top_k]
+    if not chunks:
+        return []
+    try:
+        # Build query-document pairs
+        pairs = []
+        for chunk in chunks:
+            text = (
+                chunk.get("expanded_context") or
+                chunk.get("chunk_text") or
+                chunk.get("text", "")
+            )[:512]
+            pairs.append([query, text])
+        # Score all pairs
+        scores = _reranker.predict(pairs, batch_size=16)
+        # Attach scores and sort
+        for chunk, score in zip(chunks, scores):
+            chunk["reranker_score"] = float(score)
+        reranked = sorted(chunks, key=lambda x: x.get("reranker_score", 0), reverse=True)
+        logger.info(
+            f"Reranked {len(chunks)} chunks → top {top_k}. "
+            f"Top score: {reranked[0].get('reranker_score', 0):.3f}"
+        )
+        return reranked[:top_k]
+    except Exception as e:
+        logger.warning(f"Reranking failed: {e}. Using FAISS order.")
+        return chunks[:top_k]
+def is_loaded() -> bool:
+    return _reranker_loaded

src/verify.py CHANGED Viewed

@@ -67,30 +67,12 @@ def _extract_quotes(text: str) -> list:
 def _get_embedder():
-    """Get the already-loaded embedder — no double loading."""
     try:
-        from src.retrieval import _embedder as embedder
-        return embedder
-    except ImportError:
-        pass
-    try:
-        from src.embed import _model as embedder
-        return embedder
-    except ImportError:
-        pass
-    try:
-        # Last resort — import from retrieval module globals
-        import src.retrieval as retrieval_module
-        if hasattr(retrieval_module, '_embedder'):
-            return retrieval_module._embedder
-        if hasattr(retrieval_module, 'embedder'):
-            return retrieval_module.embedder
     except Exception:
-        pass
-    return None
 def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:

 def _get_embedder():
+    """Get the already-loaded MiniLM embedder."""
     try:
+        from src.embed import _model
+        return _model
     except Exception:
+        return None
 def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: