Spaces:

dashVector
/

dashVectorSpace

Running

File size: 3,008 Bytes

b92d96d

{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# xVector Analysis\n",
                "\n",
                "This notebook is a template for visualizing the results of the dashVector / xVector engine.\n",
                "It connects to the generated logs and the Qdrant instance to provide insights."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "import pandas as pd\n",
                "import matplotlib.pyplot as plt\n",
                "import json\n",
                "import os\n",
                "\n",
                "# Path to logs\n",
                "LOG_FILE = \"../logs/active_learning_queue.jsonl\"\n",
                "\n",
                "def load_logs():\n",
                "    data = []\n",
                "    if os.path.exists(LOG_FILE):\n",
                "        with open(LOG_FILE, 'r') as f:\n",
                "            for line in f:\n",
                "                data.append(json.loads(line))\n",
                "    return pd.DataFrame(data)\n",
                "\n",
                "df = load_logs()\n",
                "if not df.empty:\n",
                "    print(f\"Loaded {len(df)} log entries.\")\n",
                "    display(df.head())\n",
                "else:\n",
                "    print(\"No logs found yet. Run main.py first.\")"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Confidence Distribution\n",
                "Analyze the confidence scores of queries that triggered active learning."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "if not df.empty:\n",
                "    plt.figure(figsize=(10, 6))\n",
                "    plt.hist(df['confidence'], bins=20, color='skyblue', edgecolor='black')\n",
                "    plt.title('Distribution of Confidence Scores (Hard Negatives)')\n",
                "    plt.xlabel('Confidence')\n",
                "    plt.ylabel('Count')\n",
                "    plt.show()"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.8.10"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}