File size: 3,008 Bytes
b92d96d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# xVector Analysis\n",
                "\n",
                "This notebook is a template for visualizing the results of the dashVector / xVector engine.\n",
                "It connects to the generated logs and the Qdrant instance to provide insights."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "import pandas as pd\n",
                "import matplotlib.pyplot as plt\n",
                "import json\n",
                "import os\n",
                "\n",
                "# Path to logs\n",
                "LOG_FILE = \"../logs/active_learning_queue.jsonl\"\n",
                "\n",
                "def load_logs():\n",
                "    data = []\n",
                "    if os.path.exists(LOG_FILE):\n",
                "        with open(LOG_FILE, 'r') as f:\n",
                "            for line in f:\n",
                "                data.append(json.loads(line))\n",
                "    return pd.DataFrame(data)\n",
                "\n",
                "df = load_logs()\n",
                "if not df.empty:\n",
                "    print(f\"Loaded {len(df)} log entries.\")\n",
                "    display(df.head())\n",
                "else:\n",
                "    print(\"No logs found yet. Run main.py first.\")"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Confidence Distribution\n",
                "Analyze the confidence scores of queries that triggered active learning."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": [
                "if not df.empty:\n",
                "    plt.figure(figsize=(10, 6))\n",
                "    plt.hist(df['confidence'], bins=20, color='skyblue', edgecolor='black')\n",
                "    plt.title('Distribution of Confidence Scores (Hard Negatives)')\n",
                "    plt.xlabel('Confidence')\n",
                "    plt.ylabel('Count')\n",
                "    plt.show()"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.8.10"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}