tensorus commited on
Commit
1fcd4c4
·
verified ·
1 Parent(s): a19849e

Upload 90 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +60 -0
  2. .gitignore +175 -0
  3. DEMO.md +203 -0
  4. Dockerfile +54 -0
  5. LICENSE +21 -0
  6. README.md +679 -8
  7. app.py +378 -0
  8. docker-compose.yml +55 -0
  9. docs/api_guide.md +49 -0
  10. docs/index.md +22 -0
  11. docs/installation.md +89 -0
  12. docs/metadata_schemas.md +57 -0
  13. mkdocs.yml +11 -0
  14. pages/1_Dashboard.py +125 -0
  15. pages/2_Control_Panel.py +138 -0
  16. pages/3_NQL_Chatbot.py +94 -0
  17. pages/4_Data_Explorer.py +167 -0
  18. pages/5_API_Playground.py +57 -0
  19. pages/6_Financial_Forecast_Demo.py +236 -0
  20. pages/api_playground_v2.py +171 -0
  21. pages/control_panel_v2.py +300 -0
  22. pages/data_explorer_v2.py +224 -0
  23. pages/nql_chatbot_v2.py +168 -0
  24. pages/pages_shared_utils.py +547 -0
  25. pages/ui_utils.py +267 -0
  26. pyproject.toml +40 -0
  27. requirements-test.txt +18 -0
  28. requirements.txt +54 -0
  29. setup.sh +12 -0
  30. tensor_data/count_ds.pt +3 -0
  31. tensor_data/test_dataset_ingest_5dde494e-54cd-4fbc-bc87-e5949cc3809e.pt +3 -0
  32. tensor_data/test_dataset_ingest_743ec0f5-c46c-42b3-92fd-5b8e504b147c.pt +3 -0
  33. tensor_data/test_dataset_ingest_d17cec44-8f9f-4856-a343-3ef700784edb.pt +3 -0
  34. tensor_data/test_dataset_ingest_f3e79d98-6fb4-496d-9763-f0ce3aa1c801.pt +3 -0
  35. tensor_data/test_dataset_integration_69bf9dbf-0ef4-4dd7-ba2d-6ba3bf31b799.pt +3 -0
  36. tensorus/__init__.py +18 -0
  37. tensorus/api.py +0 -0
  38. tensorus/api/__init__.py +25 -0
  39. tensorus/api/dependencies.py +36 -0
  40. tensorus/api/endpoints.py +601 -0
  41. tensorus/api/routers/__init__.py +0 -0
  42. tensorus/api/routers/query.py +35 -0
  43. tensorus/api/security.py +133 -0
  44. tensorus/audit.py +68 -0
  45. tensorus/automl_agent.py +356 -0
  46. tensorus/config.py +95 -0
  47. tensorus/dummy_env.py +91 -0
  48. tensorus/financial_data_generator.py +122 -0
  49. tensorus/ingestion_agent.py +409 -0
  50. tensorus/mcp_client.py +281 -0
.dockerignore ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git files
2
+ .git/
3
+ .gitignore
4
+
5
+ # Python cache and compiled files
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+
14
+ # Virtual environments
15
+ env/
16
+ venv/
17
+ .venv/
18
+
19
+ # IDE and editor specific files
20
+ .vscode/
21
+ .idea/
22
+ *.swp
23
+ *.swo
24
+
25
+ # Secrets or local configuration (should be passed via environment variables)
26
+ .env
27
+ secrets/
28
+ *.pem
29
+ *.key
30
+
31
+ # Log files
32
+ *.log
33
+ tensorus_audit.log # Specifically exclude this if it's generated locally
34
+
35
+ # Test files and reports (unless you want to run tests in the image)
36
+ tests/
37
+ htmlcov/
38
+ .pytest_cache/
39
+ .coverage
40
+
41
+ # OS-specific files
42
+ .DS_Store
43
+ Thumbs.db
44
+
45
+ # Docker files themselves (no need to copy into the image)
46
+ Dockerfile
47
+ docker-compose.yml
48
+
49
+ # Other project specific files/folders to exclude
50
+ # E.g., notebooks/, docs/, etc.
51
+ # data/ # If you have a local data folder not needed in the image
52
+ # setup.sh # Exclude if not part of the Python API image
53
+ # DEMO.md # Exclude if not part of the Python API image
54
+ # LICENSE # Usually good to include, but can be excluded if desired
55
+ # README.md # Usually good to include, but can be excluded if desired
56
+ # pages/ # If these are Streamlit pages not served by this Docker image.
57
+
58
+ # If `app.py` at root is a Streamlit launcher, and not part of this specific API service, exclude it too.
59
+ # Assuming `app.py` from the root is not part of this specific service being Dockerized.
60
+ app.py
.gitignore ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
DEMO.md ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tensorus Demo Script: Showcasing Agentic Tensor Management
2
+
3
+ ## Introduction
4
+
5
+ This demo showcases the key capabilities of Tensorus, an agentic tensor database/data lake. We'll walk through data ingestion, storage, querying, tensor operations, and how to interact with the system via its UI and APIs.
6
+
7
+ ## Prerequisites
8
+
9
+ * Tensorus backend API running (`uvicorn api:app --reload --host 127.0.0.1 --port 7860`)
10
+ * Tensorus Streamlit UI running (`streamlit run app.py`)
11
+ * Tensorus MCP Server running (`python -m tensorus.mcp_server`)
12
+ * A terminal for API calls (e.g., using `curl`) or a tool like Postman.
13
+ * A web browser.
14
+
15
+ ## Demo Scenarios
16
+
17
+ ### Scenario 1: Automated Image Data Ingestion and Exploration
18
+
19
+ **Goal:** Demonstrate how new image data is automatically ingested, stored, and can be explored.
20
+
21
+ 1. **Prepare for Ingestion:**
22
+ * Open the Streamlit UI in your browser (usually `http://localhost:8501`).
23
+ * Navigate to the "Agents" or "Control Panel" page.
24
+ * Ensure the "Data Ingestion Agent" is targeting the dataset `ingested_data_api` and monitoring the directory `temp_ingestion_source_api`. (You might need to configure this in `tensorus/api.py` if not dynamically configurable via UI).
25
+ * Start the "Data Ingestion Agent" if it's not already running. View its logs in the UI to see it polling.
26
+ * Verify the `temp_ingestion_source_api` directory (relative to where the API is run) is initially empty or clean it out.
27
+
28
+ 2. **Simulate New Data Arrival:**
29
+ * Download a sample image (e.g., a picture of a cat, `cat.jpg`) into the `temp_ingestion_source_api` directory on your local filesystem.
30
+ * **Attractive Element:** Use a visually distinct and interesting image.
31
+
32
+ 3. **Observe Ingestion:**
33
+ * In the Streamlit UI, watch the Ingestion Agent's logs. You should see messages indicating it detected `cat.jpg`, processed it, and ingested it into `ingested_data_api`.
34
+
35
+ 4. **Explore Ingested Data (UI):**
36
+ * Navigate to the "Data Explorer" page in the Streamlit UI.
37
+ * Select the `ingested_data_api` dataset from the dropdown.
38
+ * You should see the newly ingested `cat.jpg` (or its metadata) listed.
39
+ * Click to view its details (metadata, tensor shape, perhaps a preview if the UI supports it).
40
+ * **Attractive Element:** The UI showing the image tensor's information clearly, perhaps even a thumbnail if the UI is advanced.
41
+
42
+ 5. **Verify via API (Optional):**
43
+ * Use `curl` or Postman to fetch the tensor details. First, list datasets to find `ingested_data_api`, then fetch its records, identify the `record_id` for `cat.jpg` from its metadata.
44
+ ```bash
45
+ # Example: List records in the dataset to find the ID (paged)
46
+ curl "http://127.0.0.1:7860/datasets/ingested_data_api/records?offset=0&limit=100"
47
+ # Then use the ID:
48
+ # curl http://127.0.0.1:7860/datasets/ingested_data_api/tensors/{record_id_of_cat_jpg}
49
+ ```
50
+ * Confirm the API returns the tensor data and metadata.
51
+
52
+ ### Scenario 2: Natural Language Querying for Specific Data
53
+
54
+ **Goal:** Show how NQL can be used to find specific tensors.
55
+
56
+ 1. **Ensure Data Exists:**
57
+ * Make sure the `cat.jpg` from Scenario 1 is present in `ingested_data_api`.
58
+ * Optionally, add another image, e.g., `dog.png`, to `temp_ingestion_source_api` and let it be ingested. Give it distinctive metadata if possible, e.g., by modifying the `ingestion_agent.py` to add `{"animal_type": "dog"}` or by using the API to update metadata post-ingestion.
59
+
60
+ 2. **Use NQL Chatbot (UI):**
61
+ * Navigate to the "NQL Chatbot" or "Query Hub" page in the Streamlit UI.
62
+ * Enter a query. Given the basic NQL, a query targeting the filename is most reliable:
63
+ `show all data from ingested_data_api where source_file contains "cat.jpg"`
64
+ * If you added custom metadata like `{"animal_type": "cat"}` for the cat image, you could try:
65
+ `find records from ingested_data_api where animal_type = 'cat'`
66
+ * **Attractive Element:** The chatbot interface and the directness of the natural language query yielding correct results.
67
+
68
+ 3. **Observe Results:**
69
+ * The UI should display the tensor(s) matching your query.
70
+
71
+ ### Scenario 3: Performing Tensor Operations via API
72
+
73
+ **Goal:** Demonstrate applying a tensor operation to a stored tensor.
74
+
75
+ 1. **Identify a Target Tensor:**
76
+ * From Scenario 1 or 2, obtain the `record_id` of the `cat.jpg` tensor within the `ingested_data_api` dataset. Let's assume its `record_id` is `xyz123`.
77
+
78
+ 2. **Perform a Tensor Operation (e.g., Transpose API):**
79
+ * Image tensors are often (C, H, W) or (H, W, C). Let's assume it's (C, H, W) and we want to transpose H and W, which would be `dim0=1, dim1=2`.
80
+ * Use `curl` or Postman:
81
+ ```bash
82
+ curl -X POST -H "Content-Type: application/json" -d '{
83
+ "input_tensor": {
84
+ "dataset_name": "ingested_data_api",
85
+ "record_id": "xyz123"
86
+ },
87
+ "params": {
88
+ "dim0": 1,
89
+ "dim1": 2
90
+ },
91
+ "output_dataset_name": "ops_results",
92
+ "output_metadata": {"original_id": "xyz123", "operation": "transpose_height_width_demo"}
93
+ }' http://127.0.0.1:7860/ops/transpose
94
+ ```
95
+ * **Attractive Element:** Showing the API call and the structured JSON response indicating success and the new transposed tensor's ID and details.
96
+
97
+ 3. **Verify Result:**
98
+ * The API response will contain the `record_id` and details of the new (transposed) tensor in the `ops_results` dataset.
99
+ * Note the new shape in the response. If the original was (3, 128, 128), the new shape should be (3, 128, 128) after transposing height and width (assuming they were the same). If they were different, e.g. (3, 128, 200), the new shape would be (3, 200, 128).
100
+ * Optionally, use the "Data Explorer" in the UI or another API call to fetch and inspect this new tensor.
101
+
102
+ ### Scenario 4: Interacting with the MCP Server (Conceptual)
103
+
104
+ **Goal:** Explain how an external AI agent could leverage Tensorus via MCP.
105
+
106
+ 1. **Show MCP Server Running:**
107
+ * Point to the terminal where `python -m tensorus.mcp_server` is running and show its log output (e.g., "Tensorus MCP Server connected via stdio and ready.").
108
+
109
+ 2. **Explain Available Tools (Conceptual):**
110
+ * Briefly show the tool definitions in `tensorus/mcp_server.py` or refer to the README's "Available Tools" under "MCP Server Details".
111
+ * Highlight a few tools like `tensorus_list_datasets`, `tensorus_ingest_tensor`, and `tensorus_apply_binary_operation`.
112
+
113
+ 3. **Conceptual Client Interaction (Show code snippet from README):**
114
+ * Show the example client-side JavaScript snippet from the `README.md`:
115
+ ```javascript
116
+ // Conceptual MCP client-side JavaScript
117
+ // Assuming 'client' is an initialized MCP client connected to the Tensorus MCP Server
118
+
119
+ async function example() {
120
+ // List available tools
121
+ const { tools } = await client.request({ method: 'tools/list' }, {});
122
+ console.log("Available Tensorus Tools:", tools.map(t => t.name));
123
+
124
+ // Create a new dataset
125
+ const createResponse = await client.request({ method: 'tools/call' }, {
126
+ name: 'tensorus_create_dataset',
127
+ arguments: { dataset_name: 'my_mcp_dataset_demo' }
128
+ });
129
+ console.log(JSON.parse(createResponse.content[0].text).message); // MCP server often returns JSON string in text
130
+ }
131
+ ```
132
+ * **Attractive Element:** Emphasize that this allows *other* AI agents or LLMs to programmatically use Tensorus as a modular component in a larger intelligent system, promoting interoperability.
133
+
134
+ ### Scenario 5: Financial Time Series Forecasting with ARIMA
135
+
136
+ **Goal:** Demonstrate end-to-end time series forecasting using generated financial data, Tensorus for storage, an ARIMA model for prediction, and visualization within a dedicated UI page.
137
+
138
+ **Prerequisites Specific to this Demo:**
139
+ * Ensure `statsmodels` is installed. If you used the standard setup, install it via the optional models extras:
140
+ ```bash
141
+ pip install -e .[models]
142
+ ```
143
+ or install the `tensorus-models` package which includes it.
144
+
145
+ **Steps:**
146
+
147
+ 1. **Navigate to the Demo Page:**
148
+ * Open the Streamlit UI (e.g., `http://localhost:8501`).
149
+ * From the top navigation bar (or sidebar if the UI structure varies), find and click on the "Financial Forecast Demo" page (it might be titled "📈 Financial Forecast Demo" or similar).
150
+
151
+ 2. **Generate and Ingest Data:**
152
+ * On the "Financial Forecast Demo" page, locate the section "1. Data Generation & Ingestion."
153
+ * Click the button labeled **"Generate & Ingest Sample Financial Data"**.
154
+ * Wait for the spinner to complete. You should see:
155
+ * A success message indicating data ingestion into a dataset like `financial_raw_data` in Tensorus.
156
+ * A sample DataFrame (head) of the generated data (Date, Close, Volume).
157
+ * A Plotly chart displaying the historical 'Close' prices that were just generated and ingested.
158
+ * **Attractive Element:** Observe the immediate visualization of the generated time series.
159
+
160
+ 3. **Configure and Run ARIMA Prediction:**
161
+ * Go to the "3. ARIMA Model Prediction" section on the page.
162
+ * You can adjust the ARIMA order (p, d, q) and the number of future predictions if you wish. Default values (e.g., p=5, d=1, q=0, predictions=30) are provided.
163
+ * Click the button labeled **"Run ARIMA Prediction"**.
164
+ * Wait for the spinner to complete. This step involves:
165
+ * Loading the historical data from Tensorus.
166
+ * Training/fitting the ARIMA model.
167
+ * Generating future predictions.
168
+ * Storing these predictions back into Tensorus (e.g., into `financial_predictions` dataset).
169
+
170
+ 4. **View Prediction Results:**
171
+ * Once the prediction is complete, scroll to the "4. Prediction Results" section.
172
+ * You should see:
173
+ * A Plotly chart displaying the original historical data with the ARIMA predictions plotted alongside/extending from it.
174
+ * A table or list showing the actual predicted values for future dates.
175
+ * **Attractive Element:** The clear visual comparison of the forecast against the historical data, showcasing the model's predictive attempt. The interactivity of Plotly charts (zoom, pan) enhances this.
176
+
177
+ 5. **Interpretation (What this demonstrates):**
178
+ * **Data Flow:** Generation -> Tensorus Storage (Raw Data) -> Retrieval for Modeling -> Prediction -> Tensorus Storage (Predictions) -> UI Visualization.
179
+ * **Ease of Use:** A user-friendly interface to perform a complex task like time series forecasting.
180
+ * **Modularity:** Integration of data generation, storage (Tensorus), modeling (statsmodels), and UI (Streamlit) components.
181
+ * **Revised UI:** Notice the potentially improved layout and charting capabilities on this dedicated demo page.
182
+
183
+ ### Scenario 6: Dashboard Overview
184
+
185
+ **Goal:** Show the main dashboard providing a system overview.
186
+
187
+ 1. **Navigate to Dashboard:**
188
+ * In the Streamlit UI, go to the main "Nexus Dashboard" page (usually the default page when you run `streamlit run app.py`).
189
+ 2. **Review Metrics:**
190
+ * Point out the key metrics displayed:
191
+ * Total Tensors / Active Datasets (these might be placeholders or simple counts).
192
+ * Agents Online / Status (showing the Ingestion Agent as 'running' if you started it).
193
+ * API Status (should be 'Connected').
194
+ * Simulated metrics like data ingestion rate, query latency, RL rewards, AutoML progress. Explain these are illustrative.
195
+ * **Attractive Element:** A visually appealing dashboard. If any metrics are updating (even if simulated based on time), it adds to the dynamic feel.
196
+
197
+ 3. **Activity Feed (if populated):**
198
+ * Show the "Recent Agent Activity" feed. If the Ingestion Agent is running, it might populate this feed, or it might be placeholder data.
199
+ * Explain that in a fully operational system, this feed would show real-time updates from all active agents.
200
+
201
+ ## Conclusion
202
+
203
+ This demo has provided a glimpse into Tensorus's capabilities, including automated data handling by agents, flexible data storage, natural language querying, powerful tensor operations, and a user-friendly interface. The MCP server further extends its reach, allowing programmatic interaction from other AI systems, paving the way for more complex, collaborative AI workflows.
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables for Python
5
+ ENV PYTHONDONTWRITEBYTECODE 1
6
+ ENV PYTHONUNBUFFERED 1
7
+
8
+ # Set the working directory in the container
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies that might be needed by Python packages
12
+ # Example: build-essential for some packages, libpq-dev for psycopg2 from source (though -binary avoids this)
13
+ # For psycopg2-binary, typically no extra system deps are needed for common platforms if using a compatible wheel.
14
+ # RUN apt-get update && apt-get install -y --no-install-recommends build-essential libpq-dev && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Copy the requirements file into the container
17
+ COPY requirements.txt .
18
+
19
+ # Install Python dependencies
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy the application source code into the container
23
+ # This structure assumes your main application code is within the 'tensorus' directory.
24
+ COPY ./tensorus ./tensorus
25
+ # If your app.py or main.py is at the root alongside Dockerfile, you'd copy it too:
26
+ # COPY app.py . # Or specific main file if it's at the root.
27
+ # Based on `CMD ["uvicorn", "tensorus.api.main:app"...]`, main:app is in tensorus/api/main.py.
28
+
29
+ # Set default environment variables for the application
30
+ # These can be overridden when running the container (e.g., via docker run -e or docker-compose.yml)
31
+ ENV TENSORUS_STORAGE_BACKEND="in_memory"
32
+ ENV TENSORUS_POSTGRES_HOST="db" # Default for docker-compose setup
33
+ ENV TENSORUS_POSTGRES_PORT="5432"
34
+ ENV TENSORUS_POSTGRES_USER="tensorus_user_dockerfile"
35
+ ENV TENSORUS_POSTGRES_PASSWORD="tensorus_pass_dockerfile"
36
+ ENV TENSORUS_POSTGRES_DB="tensorus_db_dockerfile"
37
+ ENV TENSORUS_POSTGRES_DSN=""
38
+ ENV TENSORUS_API_KEY_HEADER_NAME="X-API-KEY"
39
+ ENV TENSORUS_VALID_API_KEYS="[]" # Example: "key1,key2" or "[\"key1\", \"key2\"]" - set to [] if no keys required
40
+ ENV TENSORUS_AUTH_JWT_ENABLED="False" # Default JWT auth to disabled
41
+ ENV TENSORUS_AUTH_JWT_ISSUER=""
42
+ ENV TENSORUS_AUTH_JWT_AUDIENCE=""
43
+ ENV TENSORUS_AUTH_JWT_ALGORITHM="RS256"
44
+ ENV TENSORUS_AUTH_JWT_JWKS_URI=""
45
+ ENV TENSORUS_AUTH_DEV_MODE_ALLOW_DUMMY_JWT="False"
46
+
47
+ # Expose the port the app runs on
48
+ EXPOSE 7860
49
+
50
+ # Define the command to run the application
51
+ # Use the package export from ``tensorus.api`` which exposes ``app`` in
52
+ # ``tensorus/api/__init__.py``. This lets Uvicorn import the app directly
53
+ # from the package without referencing the module path.
54
+ CMD ["uvicorn", "tensorus.api:app", "--host", "0.0.0.0", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tensorus
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,683 @@
1
  ---
2
- title: Mcp
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
  license: mit
9
- short_description: Tensorus MCP server
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
 
 
 
 
 
2
  license: mit
3
+ title: Core
4
+ sdk: docker
5
+ emoji: 🐠
6
+ colorFrom: blue
7
+ colorTo: yellow
8
+ short_description: Tensorus Core
9
  ---
10
 
11
+
12
+ # Tensorus: Agentic Tensor Database/Data Lake
13
+
14
+ Tensorus is a specialized data platform focused on the management and agent-driven manipulation of tensor data. It offers a streamlined environment for storing, retrieving, and operating on tensors, laying the groundwork for advanced AI and machine learning workflows.
15
+
16
+ The core purpose of Tensorus is to simplify and enhance how developers and AI agents interact with tensor datasets. By providing dedicated tools for tensor operations and a framework for agentic integration, Tensorus aims to accelerate tasks like automated data ingestion, reinforcement learning from stored experiences, and AutoML processes, ultimately enabling more efficient and intelligent data utilization in AI projects.
17
+
18
+ ## Key Features
19
+
20
+ * **Tensor Storage:** Efficiently store and retrieve PyTorch tensors with associated metadata.
21
+ * **Dataset Schemas:** Optional per-dataset schemas enforce required metadata fields and tensor shape/dtype.
22
+ * **Natural Query Language (NQL):** Query your tensor data using a simple, natural language-like syntax.
23
+ * **Agent Framework:** A foundation for building and integrating intelligent agents that interact with your data.
24
+ * **Data Ingestion Agent:** Automatically monitors a directory for new files and ingests them as tensors.
25
+ * **RL Agent:** A Deep Q-Network (DQN) agent that can learn from experiences stored in TensorStorage.
26
+ * **AutoML Agent:** Performs hyperparameter optimization for a dummy model using random search.
27
+ * **API-Driven:** A FastAPI backend provides a RESTful API for interacting with Tensorus.
28
+ * **Streamlit UI:** A user-friendly Streamlit frontend for exploring data and controlling agents.
29
+ * **Tensor Operations:** A comprehensive library of robust tensor operations for common manipulations. See [Basic Tensor Operations](#basic-tensor-operations) for details.
30
+ * **Model System:** Optional model registry with example models provided in a
31
+ separate package. See [Tensorus Models](https://github.com/tensorus/models).
32
+ * **Metadata System:** Rich Pydantic schemas and storage backends for semantic, lineage, computational, quality, relational, and usage metadata.
33
+ * **Extensible:** Designed to be extended with more advanced agents, storage backends, and query capabilities.
34
+ * **Model Context Protocol (MCP) Server:** Provides a standardized interface for AI agents and LLMs to interact with Tensorus capabilities—including dataset management, tensor storage, and operations—using the Model Context Protocol. (See [MCP Server Details](#mcp-server-details) below).
35
+
36
+ ## Project Structure
37
+
38
+ * `app.py`: The main Streamlit frontend application (located at the project root).
39
+ * `pages/`: Directory containing individual Streamlit page scripts and shared UI utilities for the dashboard.
40
+ * `pages/ui_utils.py`: Utility functions specifically for the Streamlit UI.
41
+ * *(Other page scripts like `01_dashboard.py`, `02_control_panel.py`, etc., define the different views of the dashboard)*
42
+ * `tensorus/`: Directory containing the core `tensorus` library modules (this is the main installable package).
43
+ * `tensorus/__init__.py`: Makes `tensorus` a Python package.
44
+ * `tensorus/api.py`: The FastAPI application providing the backend API for Tensorus.
45
+ * `tensorus/tensor_storage.py`: Core TensorStorage implementation for managing tensor data.
46
+ * `tensorus/tensor_ops.py`: Library of functions for tensor manipulations.
47
+ * `tensorus/nql_agent.py`: Agent for processing Natural Query Language queries.
48
+ * `tensorus/ingestion_agent.py`: Agent for ingesting data from various sources.
49
+ * `tensorus/rl_agent.py`: Agent for Reinforcement Learning tasks.
50
+ * `tensorus/automl_agent.py`: Agent for AutoML processes.
51
+ * `tensorus/dummy_env.py`: A simple environment for the RL agent demonstration.
52
+ * *(Other Python files within `tensorus/` are part of the core library.)*
53
+ * `requirements.txt`: Lists the project's Python dependencies for development and local execution.
54
+ * `pyproject.toml`: Project metadata, dependencies for distribution, and build system configuration (e.g., for PyPI).
55
+ * `tensorus/mcp_server.py`: Python implementation of the Model Context Protocol (MCP) server using `fastmcp`.
56
+ * `README.md`: This file.
57
+ * `LICENSE`: Project license file.
58
+ * `.gitignore`: Specifies intentionally untracked files that Git should ignore.
59
+
60
+ ## Huggingface Demo
61
+
62
+ You can try Tensorus online via Huggingface Spaces:
63
+
64
+ * **API Documentation:** [Swagger UI](https://tensorus-api.hf.space/docs) | [ReDoc](https://tensorus-api.hf.space/redoc)
65
+ * **Dashboard UI:** [Streamlit Dashboard](https://tensorus-dashboard.hf.space)
66
+
67
+ ## Tensorus Execution Cycle
68
+
69
+ ```mermaid
70
+ graph TD
71
+ %% User Interface Layer
72
+ subgraph UI_Layer ["User Interaction"]
73
+ UI[Streamlit UI]
74
+ end
75
+
76
+ %% API Gateway Layer
77
+ subgraph API_Layer ["Backend Services"]
78
+ API[FastAPI Backend]
79
+ MCP["MCP Server (FastMCP Python)"]
80
+ end
81
+
82
+ %% Core Storage with Method Interface
83
+ subgraph Storage_Layer ["Core Storage - TensorStorage"]
84
+ TS[TensorStorage Core]
85
+ subgraph Storage_Methods ["Storage Interface"]
86
+ TS_insert[insert data metadata]
87
+ TS_query[query query_fn]
88
+ TS_get[get_by_id id]
89
+ TS_sample[sample n]
90
+ TS_update[update_metadata]
91
+ end
92
+ TS --- Storage_Methods
93
+ end
94
+
95
+ %% Agent Processing Layer
96
+ subgraph Agent_Layer ["Processing Agents"]
97
+ IA[Ingestion Agent]
98
+ NQLA[NQL Agent]
99
+ RLA[RL Agent]
100
+ AutoMLA[AutoML Agent]
101
+ end
102
+
103
+ %% Model System
104
+ subgraph Model_Layer ["Model System"]
105
+ Registry[Model Registry]
106
+ ModelsPkg[Models Package]
107
+ end
108
+
109
+ %% Tensor Operations Library
110
+ subgraph Ops_Layer ["Tensor Operations"]
111
+ TOps[TensorOps Library]
112
+ end
113
+
114
+ %% Primary UI & MCP Flow
115
+ UI -->|HTTP Requests| API
116
+ MCP -->|MCP Calls| API
117
+
118
+ %% API Orchestration
119
+ API -->|Command Dispatch| IA
120
+ API -->|Command Dispatch| NQLA
121
+ API -->|Command Dispatch| RLA
122
+ API -->|Command Dispatch| AutoMLA
123
+ API -->|Model Training| Registry
124
+ API -->|Direct Query| TS_query
125
+
126
+ %% Model System Interactions
127
+ Registry -->|Uses Models| ModelsPkg
128
+ Registry -->|Load/Save| TS
129
+ ModelsPkg -->|Tensor Ops| TOps
130
+
131
+ %% Agent Storage Interactions
132
+ IA -->|Data Ingestion| TS_insert
133
+
134
+ NQLA -->|Query Execution| TS_query
135
+ NQLA -->|Record Retrieval| TS_get
136
+
137
+ RLA -->|State Persistence| TS_insert
138
+ RLA -->|Experience Sampling| TS_sample
139
+ RLA -->|State Retrieval| TS_get
140
+
141
+ AutoMLA -->|Trial Storage| TS_insert
142
+ AutoMLA -->|Data Retrieval| TS_query
143
+
144
+ %% Computational Operations
145
+ NQLA -->|Vector Operations| TOps
146
+ RLA -->|Policy Evaluation| TOps
147
+ AutoMLA -->|Model Optimization| TOps
148
+
149
+ %% Indirect Storage Write-back
150
+ TOps -.->|Intermediate Results| TS_insert
151
+ ```
152
+
153
+ ## Getting Started
154
+
155
+ ### Prerequisites
156
+
157
+ * Python 3.10+
158
+ * PyTorch
159
+ * FastAPI
160
+ * Uvicorn
161
+ * Streamlit
162
+ * Pydantic v2
163
+ * Requests
164
+ * Pillow (for image preprocessing)
165
+ * Matplotlib (optional, for plotting RL rewards)
166
+
167
+ ### Installation
168
+
169
+ 1. Clone the repository:
170
+
171
+ ```bash
172
+ git clone https://github.com/tensorus/tensorus.git
173
+ cd tensorus
174
+ ```
175
+
176
+ 2. Create a virtual environment (recommended):
177
+
178
+ ```bash
179
+ python3 -m venv venv
180
+ source venv/bin/activate # On Linux/macOS
181
+ venv\Scripts\activate # On Windows
182
+ ```
183
+
184
+ 3. Install dependencies using the provided setup script:
185
+
186
+ ```bash
187
+ ./setup.sh
188
+ ```
189
+ This installs Python requirements from `requirements.txt` and
190
+ `requirements-test.txt`, using CPU wheels for PyTorch and
191
+ pinning `httpx` to a compatible version. The test requirements
192
+ also install `fastapi>=0.110` for compatibility with Pydantic v2.
193
+ The script also installs test requirements for running the Python test suite.
194
+ Heavy machine-learning libraries (e.g. `xgboost`, `lightgbm`, `catboost`,
195
+ `statsmodels`, `torch-geometric`) are not installed by default. Install
196
+ them separately using `pip install tensorus[models]` or by installing the
197
+ `tensorus-models` package if you need the built-in models.
198
+ The audit logger writes to `tensorus_audit.log` by default. Override the
199
+ path with the `TENSORUS_AUDIT_LOG_PATH` environment variable if desired.
200
+
201
+ ### Running the API Server
202
+
203
+ 1. Navigate to the project root directory (the directory containing the `tensorus` folder and `pyproject.toml`).
204
+ 2. Ensure your virtual environment is activated if you are using one.
205
+ 3. Start the FastAPI backend server using:
206
+
207
+ ```bash
208
+ uvicorn tensorus.api:app --host 0.0.0.0 --port 7860
209
+ ```
210
+
211
+ * This command launches Uvicorn with the `app` instance defined in `tensorus/api.py`.
212
+ * Access the API documentation at `http://localhost:7860/docs` or `http://localhost:7860/redoc`.
213
+ * All dataset and agent endpoints are available once the server is running.
214
+
215
+ ### Running the Streamlit UI
216
+
217
+ 1. In a separate terminal (with the virtual environment activated), navigate to the project root.
218
+ 2. Start the Streamlit frontend:
219
+
220
+ ```bash
221
+ streamlit run app.py
222
+ ```
223
+
224
+ * Access the UI in your browser at the URL provided by Streamlit (usually `http://localhost:8501`).
225
+
226
+ ### Running the MCP Server
227
+
228
+ Tensorus provides a lightweight Python implementation of the Model Context Protocol server using `fastmcp`. It exposes the FastAPI endpoints as tools so you can run an MCP server without Node.js.
229
+
230
+ **Starting the MCP Server:**
231
+
232
+ 1. Install dependencies (includes `fastmcp`):
233
+ ```bash
234
+ pip install -r requirements.txt
235
+ ```
236
+ 2. Ensure the FastAPI backend is running.
237
+ 3. Start the server from the repository root:
238
+ ```bash
239
+ python -m tensorus.mcp_server
240
+ ```
241
+ Add `--transport streamable-http` for a web endpoint. SSE transport is deprecated.
242
+ If you plan to run the MCP server tests in `tests/test_mcp_server.py` and `tests/test_mcp_client.py`, run `./setup.sh` beforehand to install all required packages.
243
+
244
+
245
+ ### Running the Agents (Examples)
246
+
247
+ You can run the example agents directly from their respective files:
248
+
249
+ * **RL Agent:**
250
+
251
+ ```bash
252
+ python tensorus/rl_agent.py
253
+ ```
254
+
255
+ * **AutoML Agent:**
256
+
257
+ ```bash
258
+ python tensorus/automl_agent.py
259
+ ```
260
+
261
+ * **Ingestion Agent:**
262
+
263
+ ```bash
264
+ python tensorus/ingestion_agent.py
265
+ ```
266
+
267
+ * Note: The Ingestion Agent will monitor the `temp_ingestion_source` directory (created automatically if it doesn't exist in the project root) for new files.
268
+
269
+ ### Docker Usage
270
+
271
+ Tensorus can also be run inside a Docker container. Build the image from the project root:
272
+
273
+ ```bash
274
+ docker build -t tensorus .
275
+ ```
276
+
277
+ Run the container and expose the API server on port `7860`:
278
+
279
+ ```bash
280
+ docker run -p 7860:7860 tensorus
281
+ ```
282
+
283
+ The FastAPI documentation will then be available at `http://localhost:7860/docs`.
284
+
285
+ If your system has NVIDIA GPUs and the [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker) installed, you can pass `--gpus all` to `docker run` and modify `setup.sh` to install CUDA-enabled PyTorch wheels for GPU acceleration.
286
+
287
+ ### Deployment
288
+
289
+ Tensorus adds several security headers by default. You can customize them with environment variables:
290
+
291
+ * `TENSORUS_X_FRAME_OPTIONS` – value for the `X-Frame-Options` header (default `SAMEORIGIN`).
292
+ * `TENSORUS_CONTENT_SECURITY_POLICY` – value for the `Content-Security-Policy` header (default `default-src 'self'`).
293
+
294
+ If either variable is empty or set to `NONE`, the corresponding header is omitted.
295
+ The default policy prevents ReDoc from loading its CDN assets. See [ReDoc and Content Security Policy](docs/api_guide.md#re-doc-and-content-security-policy) for an example policy that allows them.
296
+
297
+ Example configuration:
298
+
299
+ ```bash
300
+ # Allow embedding from a trusted site
301
+ TENSORUS_X_FRAME_OPTIONS="ALLOW-FROM https://example.com"
302
+
303
+ # Permit scripts from an external CDN
304
+ TENSORUS_CONTENT_SECURITY_POLICY="default-src 'self'; script-src 'self' https://cdn.example.com"
305
+ ```
306
+
307
+ ### Test Suite Dependencies
308
+
309
+ The Python tests rely on packages from both `requirements.txt` and
310
+ `requirements-test.txt`. Make sure these dependencies are installed
311
+ before running `pytest` by executing the provided setup script:
312
+
313
+ ```bash
314
+ ./setup.sh
315
+ ```
316
+
317
+ ### Running Tests
318
+
319
+ Tensorus includes Python unit tests. To set up the environment and run them:
320
+
321
+ 1. Install all dependencies using:
322
+
323
+ ```bash
324
+ ./setup.sh
325
+ ```
326
+
327
+ This script installs packages from `requirements.txt` and `requirements-test.txt` (which pins `fastapi>=0.110` for Pydantic v2 support). It also pulls in `fastmcp` and other dependencies required by `tests/test_mcp_server.py` and `tests/test_mcp_client.py`.
328
+
329
+ 2. Run the Python test suite:
330
+
331
+ ```bash
332
+ pytest
333
+ ```
334
+
335
+ To specifically verify the Model Context Protocol components, run the MCP
336
+ server and client tests:
337
+
338
+ ```bash
339
+ pytest tests/test_mcp_server.py tests/test_mcp_client.py
340
+ ```
341
+
342
+
343
+ ## Using Tensorus
344
+
345
+ ### API Endpoints
346
+
347
+ The API provides the following main endpoints:
348
+
349
+ * **Datasets:**
350
+ * `POST /datasets/create`: Create a new dataset.
351
+ * `POST /datasets/{name}/ingest`: Ingest a tensor into a dataset.
352
+ * `GET /datasets/{name}/fetch`: Retrieve all records from a dataset.
353
+ * `GET /datasets/{name}/records`: Retrieve a page of records. Supports `offset` (start index, default `0`) and `limit` (max results, default `100`).
354
+ * `GET /datasets`: List all available datasets.
355
+ * **Querying:**
356
+ * `POST /query`: Execute an NQL query.
357
+ * **Agents:**
358
+ * `GET /agents`: List all registered agents.
359
+ * `GET /agents/{agent_id}/status`: Get the status of a specific agent.
360
+ * `POST /agents/{agent_id}/start`: Start an agent.
361
+ * `POST /agents/{agent_id}/stop`: Stop an agent.
362
+ * `GET /agents/{agent_id}/logs`: Get recent logs for an agent.
363
+ * **Metrics & Monitoring:**
364
+ * `GET /metrics/dashboard`: Get aggregated dashboard metrics.
365
+
366
+ ### Dataset Schemas
367
+
368
+ Datasets can optionally include a schema when created. The schema defines
369
+ required metadata fields and expected tensor `shape` and `dtype`. Inserts that
370
+ violate the schema will raise a validation error.
371
+
372
+ Example:
373
+
374
+ ```python
375
+ schema = {
376
+ "shape": [3, 10],
377
+ "dtype": "float32",
378
+ "metadata": {"source": "str", "value": "int"}
379
+ }
380
+ storage.create_dataset("my_ds", schema=schema)
381
+ storage.insert("my_ds", torch.rand(3, 10), {"source": "sensor", "value": 5})
382
+ ```
383
+
384
+ ## Metadata System
385
+
386
+ Tensorus includes a detailed metadata subsystem for describing tensors beyond their raw data. Each tensor has a `TensorDescriptor` and can be associated with optional semantic, lineage, computational, quality, relational, and usage metadata. The metadata storage backend is pluggable, supporting in-memory storage for quick testing or PostgreSQL for persistence. Search and aggregation utilities allow querying across these metadata fields. See [metadata_schemas.md](docs/metadata_schemas.md) for schema details.
387
+
388
+ ### Streamlit UI
389
+
390
+
391
+ The Streamlit UI provides a user-friendly interface for:
392
+
393
+ * **Dashboard:** View basic system metrics and agent status.
394
+ * **Agent Control:** Start, stop, and view logs for agents.
395
+ * **NQL Chat:** Enter natural language queries and view results.
396
+ * **Data Explorer:** Browse datasets, preview data, and perform tensor operations.
397
+
398
+ ## Natural Query Language (NQL)
399
+
400
+ Tensorus ships with a simple regex‑based Natural Query Language for retrieving
401
+ tensors by metadata. You can issue NQL queries via the API or from the "NQL
402
+ Chat" page in the Streamlit UI.
403
+
404
+ ### Enabling LLM rewriting
405
+
406
+ Set `NQL_USE_LLM=true` before starting the API server or Streamlit UI to enable
407
+ experimental LLM rewriting of natural language queries. Optionally specify a
408
+ model with `NQL_LLM_MODEL=<model-name>` (e.g., `google/flan-t5-base`). This
409
+ feature relies on the heavy `transformers` dependency and may trigger a model
410
+ download the first time it runs, which can take some time.
411
+
412
+ ## Agent Details
413
+
414
+ ### Data Ingestion Agent
415
+
416
+ * **Functionality:** Monitors a source directory for new files, preprocesses them into tensors, and inserts them into TensorStorage.
417
+ * **Supported File Types:** CSV, PNG, JPG, JPEG, TIF, TIFF (can be extended).
418
+ * **Preprocessing:** Uses default functions for CSV and images (resize, normalize).
419
+ * **Configuration:**
420
+ * `source_directory`: The directory to monitor.
421
+ * `polling_interval_sec`: How often to check for new files.
422
+ * `preprocessing_rules`: A dictionary mapping file extensions to custom preprocessing functions.
423
+
424
+ ### RL Agent
425
+
426
+ * **Functionality:** A Deep Q-Network (DQN) agent that learns from experiences stored in TensorStorage.
427
+ * **Environment:** Uses a `DummyEnv` for demonstration.
428
+ * **Experience Storage:** Stores experiences (state, action, reward, next_state, done) in TensorStorage.
429
+ * **Training:** Implements epsilon-greedy exploration and target network updates.
430
+ * **Configuration:**
431
+ * `state_dim`: Dimensionality of the environment state.
432
+ * `action_dim`: Number of discrete actions.
433
+ * `hidden_size`: Hidden layer size for the DQN.
434
+ * `lr`: Learning rate.
435
+ * `gamma`: Discount factor.
436
+ * `epsilon_*`: Epsilon-greedy parameters.
437
+ * `target_update_freq`: Target network update frequency.
438
+ * `batch_size`: Experience batch size.
439
+ * `experience_dataset`: Dataset name for experiences.
440
+ * `state_dataset`: Dataset name for state tensors.
441
+
442
+ ### AutoML Agent
443
+
444
+ * **Functionality:** Performs hyperparameter optimization using random search.
445
+ * **Model:** Trains a simple `DummyMLP` model.
446
+ * **Search Space:** Configurable hyperparameter search space (learning rate, hidden size, activation).
447
+ * **Evaluation:** Trains and evaluates models on synthetic data.
448
+ * **Results:** Stores trial results (parameters, score) in TensorStorage.
449
+ * **Configuration:**
450
+ * `search_space`: Dictionary defining the hyperparameter search space.
451
+ * `input_dim`: Input dimension for the model.
452
+ * `output_dim`: Output dimension for the model.
453
+ * `task_type`: Type of task ('regression' or 'classification').
454
+ * `results_dataset`: Dataset name for storing results.
455
+
456
+
457
+ ### Tensorus Models
458
+
459
+ The collection of example models previously bundled with Tensorus now lives in
460
+ a separate repository: [tensorus/models](https://github.com/tensorus/models).
461
+ Install it with:
462
+
463
+ ```bash
464
+ pip install tensorus-models
465
+ ```
466
+
467
+ When the package is installed, Tensorus will automatically import it. Set the
468
+ environment variable `TENSORUS_MINIMAL_IMPORT=1` before importing Tensorus to
469
+ skip this optional dependency and keep startup lightweight.
470
+
471
+ ## Basic Tensor Operations
472
+
473
+ This section details the core tensor manipulation functionalities provided by `tensor_ops.py`. These operations are designed to be robust, with built-in type and shape checking where appropriate.
474
+
475
+ #### Arithmetic Operations
476
+
477
+ * `add(t1, t2)`: Element-wise addition of two tensors, or a tensor and a scalar.
478
+ * `subtract(t1, t2)`: Element-wise subtraction of two tensors, or a tensor and a scalar.
479
+ * `multiply(t1, t2)`: Element-wise multiplication of two tensors, or a tensor and a scalar.
480
+ * `divide(t1, t2)`: Element-wise division of two tensors, or a tensor and a scalar. Includes checks for division by zero.
481
+ * `power(t1, t2)`: Raises each element in `t1` to the power of `t2`. Supports tensor or scalar exponents.
482
+ * `log(tensor)`: Element-wise natural logarithm with warnings for non-positive values.
483
+
484
+ #### Matrix and Dot Operations
485
+
486
+ * `matmul(t1, t2)`: Matrix multiplication of two tensors, supporting various dimensionalities (e.g., 2D matrices, batched matrix multiplication).
487
+ * `dot(t1, t2)`: Computes the dot product of two 1D tensors.
488
+ * `outer(t1, t2)`: Computes the outer product of two 1‑D tensors.
489
+ * `cross(t1, t2, dim=-1)`: Computes the cross product along the specified dimension (size must be 3).
490
+ * `matrix_eigendecomposition(matrix_A)`: Returns eigenvalues and eigenvectors of a square matrix.
491
+ * `matrix_trace(matrix_A)`: Computes the trace of a 2-D matrix.
492
+ * `tensor_trace(tensor_A, axis1=0, axis2=1)`: Trace of a tensor along two axes.
493
+ * `svd(matrix)`: Singular value decomposition of a matrix, returns `U`, `S`, and `Vh`.
494
+ * `qr_decomposition(matrix)`: QR decomposition returning `Q` and `R`.
495
+ * `lu_decomposition(matrix)`: LU decomposition returning permutation `P`, lower `L`, and upper `U` matrices.
496
+ * `cholesky_decomposition(matrix)`: Cholesky factor of a symmetric positive-definite matrix.
497
+ * `matrix_inverse(matrix)`: Inverse of a square matrix.
498
+ * `matrix_determinant(matrix)`: Determinant of a square matrix.
499
+ * `matrix_rank(matrix)`: Rank of a matrix.
500
+
501
+ #### Reduction Operations
502
+
503
+ * `sum(tensor, dim=None, keepdim=False)`: Computes the sum of tensor elements over specified dimensions.
504
+ * `mean(tensor, dim=None, keepdim=False)`: Computes the mean of tensor elements over specified dimensions. Tensor is cast to float for calculation.
505
+ * `min(tensor, dim=None, keepdim=False)`: Finds the minimum value in a tensor, optionally along a dimension. Returns values and indices if `dim` is specified.
506
+ * `max(tensor, dim=None, keepdim=False)`: Finds the maximum value in a tensor, optionally along a dimension. Returns values and indices if `dim` is specified.
507
+ * `variance(tensor, dim=None, unbiased=False, keepdim=False)`: Variance of tensor elements.
508
+ * `covariance(matrix_X, matrix_Y=None, rowvar=True, bias=False, ddof=None)`: Covariance matrix estimation.
509
+ * `correlation(matrix_X, matrix_Y=None, rowvar=True)`: Correlation coefficient matrix.
510
+
511
+ #### Reshaping and Slicing
512
+
513
+ * `reshape(tensor, shape)`: Changes the shape of a tensor without changing its data.
514
+ * `transpose(tensor, dim0, dim1)`: Swaps two dimensions of a tensor.
515
+ * `permute(tensor, dims)`: Permutes the dimensions of a tensor according to the specified order.
516
+ * `flatten(tensor, start_dim=0, end_dim=-1)`: Flattens a range of dimensions into a single dimension.
517
+ * `squeeze(tensor, dim=None)`: Removes dimensions of size 1, or a specific dimension if provided.
518
+ * `unsqueeze(tensor, dim)`: Inserts a dimension of size 1 at the given position.
519
+
520
+ #### Concatenation and Splitting
521
+
522
+ * `concatenate(tensors, dim=0)`: Joins a sequence of tensors along an existing dimension.
523
+ * `stack(tensors, dim=0)`: Joins a sequence of tensors along a new dimension.
524
+
525
+ #### Advanced Operations
526
+
527
+ * `einsum(equation, *tensors)`: Applies Einstein summation convention to the input tensors based on the provided equation string.
528
+ * `compute_gradient(func, tensor)`: Returns the gradient of a scalar `func` with respect to `tensor`.
529
+ * `compute_jacobian(func, tensor)`: Computes the Jacobian matrix of a vector function.
530
+ * `convolve_1d(signal_x, kernel_w, mode='valid')`: 1‑D convolution using `torch.nn.functional.conv1d`.
531
+ * `convolve_2d(image_I, kernel_K, mode='valid')`: 2‑D convolution using `torch.nn.functional.conv2d`.
532
+ * `frobenius_norm(tensor)`: Calculates the Frobenius norm.
533
+ * `l1_norm(tensor)`: Calculates the L1 norm (sum of absolute values).
534
+
535
+ ## Tensor Decomposition Operations
536
+
537
+ Tensorus includes a library of higher‑order tensor factorizations in
538
+ `tensor_decompositions.py`. These operations mirror the algorithms
539
+ available in TensorLy and related libraries.
540
+
541
+ * **CP Decomposition** – Canonical Polyadic factorization returning
542
+ weights and factor matrices.
543
+ * **NTF‑CP Decomposition** – Non‑negative CP using
544
+ `non_negative_parafac`.
545
+ * **Tucker Decomposition** – Standard Tucker factorization for specified
546
+ ranks.
547
+ * **Non‑negative Tucker / Partial Tucker** – Variants with HOOI and
548
+ non‑negative constraints.
549
+ * **HOSVD** – Higher‑order SVD (Tucker with full ranks).
550
+ * **Tensor Train (TT)** – Sequence of TT cores representing the tensor.
551
+ * **TT‑SVD** – TT factorization via SVD initialization.
552
+ * **Tensor Ring (TR)** – Circular variant of TT.
553
+ * **Hierarchical Tucker (HT)** – Decomposition using a dimension tree.
554
+ * **Block Term Decomposition (BTD)** – Sum of Tucker‑1 terms for 3‑way
555
+ tensors.
556
+ * **t‑SVD** – Tensor singular value decomposition based on the
557
+ t‑product.
558
+
559
+ Examples of how to call these methods are provided in
560
+ [`tensorus/tensor_decompositions.py`](tensorus/tensor_decompositions.py).
561
+
562
+ ## MCP Server Details
563
+
564
+ The Tensorus Model Context Protocol (MCP) Server allows external AI agents, LLM-based applications, and other MCP-compatible clients to interact with Tensorus functionalities in a standardized way. It acts as a bridge, translating MCP requests into calls to the Tensorus Python API.
565
+
566
+ ### Overview
567
+
568
+ * **Protocol:** Implements the [Model Context Protocol](https://modelcontextprotocol.io/introduction).
569
+ * **Language:** Python, using the `fastmcp` library.
570
+ * **Communication:** Typically uses stdio for communication with a single client.
571
+ * **Interface:** Exposes Tensorus capabilities as a set of "tools" that an MCP client can list and call.
572
+
573
+ ### Available Tools
574
+
575
+ The MCP server provides tools for various Tensorus functionalities. Below is an overview. For detailed input schemas and descriptions, an MCP client can call the standard `tools/list` method on the server, or you can inspect the tool definitions in `tensorus/mcp_server.py`.
576
+
577
+ * **Dataset Management:**
578
+ * `tensorus_list_datasets`: Lists all available datasets.
579
+ * `tensorus_create_dataset`: Creates a new dataset.
580
+ * `tensorus_delete_dataset`: Deletes an existing dataset.
581
+ * **Tensor Management:**
582
+ * `tensorus_ingest_tensor`: Ingests a new tensor (with data provided as JSON) into a dataset.
583
+ * `tensorus_get_tensor_details`: Retrieves the data and metadata for a specific tensor.
584
+ * `tensorus_delete_tensor`: Deletes a specific tensor from a dataset.
585
+ * `tensorus_update_tensor_metadata`: Updates the metadata of a specific tensor.
586
+ * **Tensor Operations:** These tools allow applying operations from the `TensorOps` library to tensors stored in Tensorus.
587
+ * `tensorus_apply_unary_operation`: Applies operations like `log`, `reshape`, `transpose`, `permute`, `sum`, `mean`, `min`, `max`.
588
+ * `tensorus_apply_binary_operation`: Applies operations like `add`, `subtract`, `multiply`, `divide`, `power`, `matmul`, `dot`.
589
+ * `tensorus_apply_list_operation`: Applies operations like `concatenate` and `stack` that take a list of input tensors.
590
+ * `tensorus_apply_einsum`: Applies Einstein summation.
591
+
592
+ *Note on Tensor Operations via MCP:* Input tensors are referenced by their `dataset_name` and `record_id`. The result is typically stored as a new tensor, and the MCP tool returns details of this new result tensor (like its `record_id`).
593
+
594
+ ### Example Client Interaction (Conceptual)
595
+
596
+ ```javascript
597
+ // Conceptual MCP client-side JavaScript
598
+ // Assuming 'client' is an initialized MCP client connected to the Tensorus MCP Server
599
+
600
+ async function example() {
601
+ // List available tools
602
+ const { tools } = await client.request({ method: 'tools/list' }, {});
603
+ console.log("Available Tensorus Tools:", tools.map(t => t.name)); // Log only names for brevity
604
+
605
+ // Create a new dataset
606
+ const createResponse = await client.request({ method: 'tools/call' }, {
607
+ name: 'tensorus_create_dataset',
608
+ arguments: { dataset_name: 'my_mcp_dataset' }
609
+ });
610
+ console.log(JSON.parse(createResponse.content[0].text).message);
611
+
612
+ // Ingest a tensor
613
+ const ingestResponse = await client.request({ method: 'tools/call' }, {
614
+ name: 'tensorus_ingest_tensor',
615
+ arguments: {
616
+ dataset_name: 'my_mcp_dataset',
617
+ tensor_shape: [2, 2],
618
+ tensor_dtype: 'float32',
619
+ tensor_data: [[1.0, 2.0], [3.0, 4.0]],
620
+ metadata: { source: 'mcp_client_example' }
621
+ }
622
+ });
623
+ // Assuming the Python API returns { success, message, data: { record_id, ... } }
624
+ // And MCP server stringifies this whole object in the text content
625
+ const ingestData = JSON.parse(ingestResponse.content[0].text);
626
+ console.log("Ingest success:", ingestData.success, "Record ID:", ingestData.data.record_id);
627
+ }
628
+ ```
629
+
630
+ You can also interact with the server using the included Python helper:
631
+
632
+ ```python
633
+ from tensorus.mcp_client import TensorusMCPClient
634
+
635
+ async def example_py():
636
+ async with TensorusMCPClient("http://localhost:7860/mcp") as client:
637
+ tools = await client.list_datasets()
638
+ print(tools)
639
+ ```
640
+
641
+ ## Completed Features
642
+
643
+ * **Tensor Storage:** Efficiently stores and retrieves PyTorch tensors with associated metadata, including in-memory and optional file-based persistence. Supports dataset creation, tensor ingestion, querying, sampling, and metadata updates.
644
+ * **Natural Query Language (NQL):** Provides a basic regex-based natural language interface for querying tensor data, supporting retrieval and simple filtering.
645
+ * **Agent Framework:** Includes several operational agents:
646
+ * **Data Ingestion Agent:** Monitors local directories for CSV and image files, preprocesses them, and ingests them into TensorStorage.
647
+ * **RL Agent:** Implements a DQN agent that learns from experiences (stored in TensorStorage) in a dummy environment.
648
+ * **AutoML Agent:** Performs random search hyperparameter optimization for a dummy MLP model, storing trial results in TensorStorage.
649
+ * **API-Driven:** A comprehensive FastAPI backend offers RESTful endpoints for dataset management, NQL querying, tensor operations, and agent control (live for Ingestion Agent, simulated for RL/AutoML).
650
+ * **Streamlit UI:** A multi-page user interface for dashboard overview, agent control, NQL interaction, data exploration, and API interaction.
651
+ * **Tensor Operations:** A library of robust tensor operations (arithmetic, matrix ops, reductions, reshaping, etc.) accessible via the API.
652
+ * **Model Context Protocol (MCP) Server:** A Python server built with `fastmcp` exposes Tensorus capabilities (storage and operations) via the Model Context Protocol.
653
+ * **Extensible Design:** The project is structured with modular components, facilitating future extensions.
654
+
655
+ ## Future Implementation
656
+
657
+ * **Enhanced NQL:** Integrate a local or remote LLM for more robust natural language understanding.
658
+ * **Advanced Agents:** Develop more sophisticated agents for specific tasks (e.g., anomaly detection, forecasting).
659
+ * **Persistent Storage Backend:** Replace/augment current file-based persistence with more robust database or cloud storage solutions (e.g., PostgreSQL, S3, MinIO).
660
+ * **Scalability & Performance:**
661
+ * Implement tensor chunking for very large tensors.
662
+ * Optimize query performance with indexing.
663
+ * Asynchronous operations for agents and API calls.
664
+ * **Security:** Implement authentication and authorization mechanisms for the API and UI.
665
+ * **Real-World Integration:**
666
+ * Connect Ingestion Agent to more data sources (e.g., cloud storage, databases, APIs).
667
+ * Integrate RL Agent with real-world environments or more complex simulations.
668
+ * **Advanced AutoML:**
669
+ * Implement sophisticated search algorithms (e.g., Bayesian Optimization, Hyperband).
670
+ * Support for diverse model architectures and custom models.
671
+ * **Model Management:** Add capabilities for saving, loading, versioning, and deploying trained models (from RL/AutoML).
672
+ * **Streaming Data Support:** Enhance Ingestion Agent to handle real-time streaming data.
673
+ * **Resource Management:** Add tools and controls for monitoring and managing the resource consumption (CPU, memory) of agents.
674
+ * **Improved UI/UX:** Continuously refine the Streamlit UI for better usability and richer visualizations.
675
+ * **Comprehensive Testing:** Expand unit, integration, and end-to-end tests.
676
+
677
+ ## Contributing
678
+
679
+ Contributions are welcome! Please feel free to open issues or submit pull requests.
680
+
681
+ ## License
682
+
683
+ MIT License
app.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ """
3
+ Streamlit frontend application for the Tensorus platform.
4
+ New UI structure with top navigation and Nexus Dashboard.
5
+ """
6
+
7
+ import streamlit as st
8
+ import json
9
+ import time
10
+ import requests # Needed for ui_utils functions if integrated
11
+ import logging # Needed for ui_utils functions if integrated
12
+ import torch # Needed for integrated tensor utils
13
+ from typing import List, Dict, Any, Optional, Union, Tuple # Needed for integrated tensor utils
14
+ from pages.pages_shared_utils import get_api_status, get_agent_status, get_datasets # Updated imports
15
+
16
+ # Work around a Streamlit bug where inspecting `torch.classes` during module
17
+ # watching can raise a `RuntimeError`. Removing the module from `sys.modules`
18
+ # prevents Streamlit's watcher from trying to access it.
19
+ import sys
20
+ if "torch.classes" in sys.modules:
21
+ del sys.modules["torch.classes"]
22
+
23
+ # --- Page Configuration ---
24
+ st.set_page_config(
25
+ page_title="Tensorus Platform",
26
+ page_icon="🧊",
27
+ layout="wide",
28
+ initial_sidebar_state="collapsed" # Collapse sidebar as nav is now at top
29
+ )
30
+
31
+ # --- Configure Logging (Optional but good practice) ---
32
+ logging.basicConfig(level=logging.INFO)
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # --- Integrated Tensor Utilities (Preserved) ---
36
+
37
+ def _validate_tensor_data(data: List[Any], shape: List[int]):
38
+ """
39
+ Validates if the nested list structure of 'data' matches the 'shape'.
40
+ Raises ValueError on mismatch. (Optional validation)
41
+ """
42
+ if not shape:
43
+ if not isinstance(data, (int, float)): raise ValueError("Scalar tensor data must be a single number.")
44
+ return True
45
+ if not isinstance(data, list): raise ValueError(f"Data for shape {shape} must be a list.")
46
+ expected_len = shape[0]
47
+ if len(data) != expected_len: raise ValueError(f"Dimension 0 mismatch: Expected {expected_len}, got {len(data)} for shape {shape}.")
48
+ if len(shape) > 1:
49
+ for item in data: _validate_tensor_data(item, shape[1:])
50
+ elif len(shape) == 1:
51
+ if not all(isinstance(x, (int, float)) for x in data): raise ValueError("Innermost list elements must be numbers.")
52
+ return True
53
+
54
+ def list_to_tensor(shape: List[int], dtype_str: str, data: Union[List[Any], int, float]) -> torch.Tensor:
55
+ """
56
+ Converts a Python list (potentially nested) or scalar into a PyTorch tensor
57
+ with the specified shape and dtype.
58
+ """
59
+ try:
60
+ dtype_map = {
61
+ 'float32': torch.float32, 'float': torch.float,
62
+ 'float64': torch.float64, 'double': torch.double,
63
+ 'int32': torch.int32, 'int': torch.int,
64
+ 'int64': torch.int64, 'long': torch.long,
65
+ 'bool': torch.bool
66
+ }
67
+ torch_dtype = dtype_map.get(dtype_str.lower())
68
+ if torch_dtype is None: raise ValueError(f"Unsupported dtype string: {dtype_str}")
69
+
70
+ tensor = torch.tensor(data, dtype=torch_dtype)
71
+
72
+ if list(tensor.shape) != shape:
73
+ logger.debug(f"Initial tensor shape {list(tensor.shape)} differs from target {shape}. Attempting reshape.")
74
+ try:
75
+ tensor = tensor.reshape(shape)
76
+ except RuntimeError as reshape_err:
77
+ raise ValueError(f"Created tensor shape {list(tensor.shape)} != requested {shape} and reshape failed: {reshape_err}") from reshape_err
78
+
79
+ return tensor
80
+ except (TypeError, ValueError) as e:
81
+ logger.error(f"Error converting list to tensor: {e}. Shape: {shape}, Dtype: {dtype_str}")
82
+ raise ValueError(f"Failed tensor conversion: {e}") from e
83
+ except Exception as e:
84
+ logger.exception(f"Unexpected error during list_to_tensor: {e}")
85
+ raise ValueError(f"Unexpected tensor conversion error: {e}") from e
86
+
87
+ def tensor_to_list(tensor: torch.Tensor) -> Tuple[List[int], str, List[Any]]:
88
+ """
89
+ Converts a PyTorch tensor back into its shape, dtype string, and nested list representation.
90
+ """
91
+ if not isinstance(tensor, torch.Tensor):
92
+ raise TypeError("Input must be a torch.Tensor")
93
+ shape = list(tensor.shape)
94
+ dtype_str = str(tensor.dtype).split('.')[-1]
95
+ data = tensor.tolist()
96
+ return shape, dtype_str, data
97
+
98
+ # --- Helper functions for dashboard (can be expanded) ---
99
+ def get_total_tensors_placeholder():
100
+ # For now, as this endpoint is hypothetical for this task
101
+ return "N/A"
102
+
103
+ @st.cache_data(ttl=300)
104
+ def get_active_datasets_placeholder():
105
+ datasets = get_datasets()
106
+ if datasets: # get_datasets returns [] on error or if no datasets
107
+ return str(len(datasets))
108
+ return "Error"
109
+
110
+ @st.cache_data(ttl=60)
111
+ def get_agents_online_placeholder():
112
+ agent_data = get_agent_status()
113
+ if agent_data:
114
+ try:
115
+ # Assuming agent_data is a dict like {'agent_id': {'status': 'running', ...}}
116
+ # or {'agent_id': {'running': True, ...}}
117
+ online_agents = sum(1 for agent in agent_data.values()
118
+ if agent.get('running') is True or str(agent.get('status', '')).lower() == 'running')
119
+ total_agents = len(agent_data)
120
+ return f"{online_agents}/{total_agents} Online"
121
+ except Exception as e:
122
+ logger.error(f"Error processing agent data for dashboard: {e}")
123
+ return "Error"
124
+ return "N/A" # If agent_data is None
125
+
126
+ # --- CSS Styles ---
127
+ # Renaming app.py's specific CSS loader to avoid confusion with the shared one.
128
+ def load_app_specific_css():
129
+ # This function now only loads styles specific to the Nexus Dashboard content in app.py
130
+ # General styles (body, .stApp, nav, common-card, etc.) are in pages_shared_utils.load_shared_css()
131
+ st.markdown("""
132
+ <style>
133
+ /* Nexus Dashboard Specific Styles */
134
+ .dashboard-title { /* Custom title for the dashboard */
135
+ color: #e0e0ff; /* Light purple/blue, matching shared h1 */
136
+ text-align: center;
137
+ margin-top: 1rem; /* Standardized margin */
138
+ margin-bottom: 2rem;
139
+ font-size: 2.8em; /* Slightly larger for main dashboard title */
140
+ font-weight: bold;
141
+ }
142
+
143
+ /* Metric Cards Container for Dashboard */
144
+ .metric-card-container {
145
+ display: flex;
146
+ flex-wrap: wrap;
147
+ justify-content: space-around;
148
+ gap: 20px;
149
+ padding: 0 1rem;
150
+ }
151
+
152
+ /* Individual Metric Card - inherits from .common-card (defined in shared_utils) */
153
+ /* This specific .metric-card class is for dashboard cards if they need further specialization */
154
+ .metric-card {
155
+ /* Inherits background, border, padding, shadow, transition from .common-card */
156
+ flex: 1 1 220px; /* Adjusted flex basis */
157
+ min-width: 200px;
158
+ max-width: 300px;
159
+ text-align: center;
160
+ }
161
+ /* .metric-card:hover is inherited from .common-card:hover */
162
+
163
+ .metric-card .icon { /* Specific styling for icons within dashboard metric cards */
164
+ font-size: 2.8em; /* Slightly larger icon for dashboard */
165
+ margin-bottom: 0.5rem; /* Tighter spacing */
166
+ /* color is inherited from .common-card .icon or can be overridden here */
167
+ }
168
+ .metric-card h3 { /* Metric card titles */
169
+ /* color, font-size, margin-bottom, font-weight inherited from .common-card h3 */
170
+ /* No specific overrides here unless needed for dashboard metric cards */
171
+ }
172
+ .metric-card p.metric-value { /* Specific class for the main value display */
173
+ font-size: 2em; /* Larger font for the metric value */
174
+ font-weight: bold;
175
+ color: #ffffff; /* White color for emphasis */
176
+ margin-top: 0.25rem; /* Adjust as needed */
177
+ margin-bottom: 0;
178
+ }
179
+
180
+ /* Specific status icon colors for API status card in dashboard */
181
+ .metric-card.api-status-connected .icon { color: #50C878; } /* Emerald Green */
182
+ .metric-card.api-status-disconnected .icon { color: #FF6961; } /* Pastel Red */
183
+
184
+ /* Activity Feed Styles for Dashboard */
185
+ .activity-feed-container { /* Container for the activity feed section */
186
+ margin-top: 2.5rem;
187
+ padding: 0 1.5rem;
188
+ }
189
+ /* .activity-feed-container h2 is covered by shared h2 styles */
190
+
191
+ .activity-item { /* Individual item in the feed */
192
+ background-color: #1e2a47; /* Slightly lighter than common-card, for variety */
193
+ padding: 0.85rem 1.25rem; /* Adjusted padding */
194
+ border-radius: 6px;
195
+ margin-bottom: 0.6rem; /* Slightly more space */
196
+ font-size: 0.95em;
197
+ border-left: 4px solid #3a6fbf; /* Accent Blue border */
198
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
199
+ }
200
+ .activity-item .timestamp { /* Timestamp within an activity item */
201
+ color: #8080af; /* Muted purple for timestamp */
202
+ font-weight: bold;
203
+ font-size: 0.85em; /* Smaller timestamp */
204
+ margin-right: 0.75em; /* More space after timestamp */
205
+ }
206
+ .activity-item strong { /* Agent name or key part of activity */
207
+ color: #b0b0df; /* Tertiary heading color for emphasis */
208
+ }
209
+ </style>
210
+ """, unsafe_allow_html=True)
211
+
212
+ # --- Page Functions ---
213
+
214
+ def nexus_dashboard_content():
215
+ # Uses .dashboard-title for its main heading
216
+ st.markdown('<h1 class="dashboard-title">Tensorus Nexus</h1>', unsafe_allow_html=True)
217
+
218
+ # System Health & Key Metrics
219
+ # Uses .metric-card-container for the overall layout
220
+ st.markdown('<div class="metric-card-container">', unsafe_allow_html=True)
221
+
222
+ # Card 1: Total Tensors
223
+ total_tensors_val = get_total_tensors_placeholder()
224
+ st.markdown(f"""
225
+ <div class="common-card metric-card">
226
+ <div class="icon">⚙️</div>
227
+ <h3>Total Tensors</h3>
228
+ <p class="metric-value">{total_tensors_val}</p>
229
+ </div>
230
+ """, unsafe_allow_html=True)
231
+
232
+ # Card 2: Active Datasets
233
+ active_datasets_val = get_active_datasets_placeholder()
234
+ st.markdown(f"""
235
+ <div class="common-card metric-card">
236
+ <div class="icon">📚</div>
237
+ <h3>Active Datasets</h3>
238
+ <p class="metric-value">{active_datasets_val}</p>
239
+ </div>
240
+ """, unsafe_allow_html=True)
241
+
242
+ # Card 3: Agents Online
243
+ agents_online_val = get_agents_online_placeholder()
244
+ st.markdown(f"""
245
+ <div class="common-card metric-card">
246
+ <div class="icon">🤖</div>
247
+ <h3>Agents Online</h3>
248
+ <p class="metric-value">{agents_online_val}</p>
249
+ </div>
250
+ """, unsafe_allow_html=True)
251
+
252
+ # Card 4: API Status
253
+ @st.cache_data(ttl=30)
254
+ def cached_get_api_status():
255
+ return get_api_status()
256
+
257
+ api_ok, _ = cached_get_api_status()
258
+ api_status_text_val = "Connected" if api_ok else "Disconnected"
259
+ # Add specific class for API status icon coloring based on shared status styles
260
+ api_status_icon_class = "api-status-connected" if api_ok else "api-status-disconnected"
261
+ api_icon_char = "✔️" if api_ok else "❌"
262
+ st.markdown(f"""
263
+ <div class="common-card metric-card {api_status_icon_class}">
264
+ <div class="icon">{api_icon_char}</div>
265
+ <h3>API Status</h3>
266
+ <p class="metric-value">{api_status_text_val}</p>
267
+ </div>
268
+ """, unsafe_allow_html=True)
269
+
270
+ st.markdown('</div>', unsafe_allow_html=True) # Close metric-card-container
271
+
272
+ # Agent Activity Feed
273
+ # Uses .activity-feed-container and h2 (which is styled by shared CSS)
274
+ st.markdown('<div class="activity-feed-container">', unsafe_allow_html=True)
275
+ st.markdown('<h2>Recent Agent Activity</h2>', unsafe_allow_html=True)
276
+
277
+ # Placeholder activity items
278
+ activity_items = [
279
+ {"timestamp": "2023-10-27 10:05:15", "agent": "IngestionAgent", "action": "added 'img_new.png' to 'raw_images'"},
280
+ {"timestamp": "2023-10-27 10:02:30", "agent": "RLAgent", "action": "completed training cycle, reward: 75.2"},
281
+ {"timestamp": "2023-10-27 09:55:48", "agent": "MonitoringAgent", "action": "detected high CPU usage on node 'compute-01'"},
282
+ {"timestamp": "2023-10-27 09:45:10", "agent": "IngestionAgent", "action": "processed batch of 100 sensor readings"},
283
+ ]
284
+
285
+ for item in activity_items:
286
+ st.markdown(f"""
287
+ <div class="activity-item">
288
+ <span class="timestamp">[{item['timestamp']}]</span>
289
+ <strong>{item['agent']}:</strong> {item['action']}
290
+ </div>
291
+ """, unsafe_allow_html=True)
292
+ st.markdown('</div>', unsafe_allow_html=True) # Close activity-feed-container
293
+
294
+
295
+ # --- Main Application ---
296
+ # Import the shared CSS loader
297
+ try:
298
+ from pages.pages_shared_utils import load_css as load_shared_css
299
+ except ImportError:
300
+ st.error("Failed to import shared CSS loader. Page styling will be incomplete.")
301
+ def load_shared_css(): pass # Dummy function
302
+
303
+ def main():
304
+ load_shared_css() # Load shared styles first
305
+ load_app_specific_css() # Then load app-specific styles (for dashboard)
306
+
307
+ # Initialize session state for current page if not set
308
+ if 'current_page' not in st.session_state:
309
+ st.session_state.current_page = "Nexus Dashboard"
310
+
311
+ # --- Top Navigation Bar ---
312
+ nav_items = {
313
+ "Nexus Dashboard": "Nexus Dashboard",
314
+ "Agents": "Agents",
315
+ "Explorer": "Explorer",
316
+ "Query Hub": "Query Hub",
317
+ "API Docs": "API Docs"
318
+ }
319
+
320
+ nav_html_parts = [f'<div class="topnav-container"><span class="logo">🧊 Tensorus</span><nav>']
321
+ for page_id, page_name in nav_items.items():
322
+ active_class = "active" if st.session_state.current_page == page_id else ""
323
+ # Use st.button an invisible character as key to make Streamlit rerun and update session state
324
+ # This is a common workaround for making nav links update state
325
+ # We'll use a more robust JavaScript approach if this is problematic, but st.query_params is better
326
+
327
+ # Using query_params for navigation state is more robust
328
+ # Check if query_params for page is set, if so, it overrides session_state
329
+ query_params = st.query_params.to_dict()
330
+ if "page" in query_params and query_params["page"] in nav_items:
331
+ st.session_state.current_page = query_params["page"]
332
+ # Clear the query param after use to avoid it sticking on manual refresh
333
+ # However, for deeplinking, we might want to keep it.
334
+ # For now, let's allow it to persist. To clear: st.query_params.clear()
335
+
336
+ # Construct the link with query_params
337
+ nav_html_parts.append(
338
+ f'<a href="?page={page_id}" class="{active_class}" target="_self">{page_name}</a>'
339
+ )
340
+
341
+ nav_html_parts.append('</nav></div>')
342
+ st.markdown("".join(nav_html_parts), unsafe_allow_html=True)
343
+
344
+ # Handle page selection clicks (alternative to query_params if that proves problematic)
345
+ # This part is tricky with pure st.markdown links.
346
+ # The query_params approach is generally preferred for web-like navigation.
347
+
348
+ # --- Content Area ---
349
+ # The main app.py now acts as a router to other pages or displays dashboard content directly.
350
+ if st.session_state.current_page == "Nexus Dashboard":
351
+ nexus_dashboard_content()
352
+ elif st.session_state.current_page == "Agents":
353
+ st.switch_page("pages/control_panel_v2.py")
354
+ elif st.session_state.current_page == "Explorer":
355
+ st.switch_page("pages/data_explorer_v2.py")
356
+ elif st.session_state.current_page == "Query Hub":
357
+ st.switch_page("pages/nql_chatbot_v2.py")
358
+ elif st.session_state.current_page == "API Docs":
359
+ st.switch_page("pages/api_playground_v2.py")
360
+ else:
361
+ # Default to Nexus Dashboard if current_page is unrecognized
362
+ st.session_state.current_page = "Nexus Dashboard"
363
+ nexus_dashboard_content()
364
+ # It's good practice to trigger a rerun if state was corrected
365
+ st.rerun()
366
+
367
+
368
+ if __name__ == "__main__":
369
+ # --- Initialize Old Session State Keys (to avoid errors if they are still used by preserved code) ---
370
+ # This should be phased out as those sections are rebuilt.
371
+ if 'agent_status' not in st.session_state: st.session_state.agent_status = None
372
+ if 'datasets' not in st.session_state: st.session_state.datasets = []
373
+ if 'selected_dataset' not in st.session_state: st.session_state.selected_dataset = None
374
+ if 'dataset_preview' not in st.session_state: st.session_state.dataset_preview = None
375
+ if 'explorer_result' not in st.session_state: st.session_state.explorer_result = None
376
+ if 'nql_response' not in st.session_state: st.session_state.nql_response = None
377
+
378
+ main()
docker-compose.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ app:
5
+ build: .
6
+ ports:
7
+ - "7860:7860"
8
+ volumes:
9
+ # Persist audit logs generated by the app container to the host
10
+ # The path inside the container is /app/tensorus_audit.log as defined in tensorus/audit.py (implicitly if WORKDIR is /app)
11
+ - ./tensorus_audit.log:/app/tensorus_audit.log
12
+ environment:
13
+ # Python settings
14
+ PYTHONUNBUFFERED: 1
15
+ # Tensorus App Settings
16
+ TENSORUS_STORAGE_BACKEND: postgres
17
+ TENSORUS_POSTGRES_HOST: db # Service name of the postgres container
18
+ TENSORUS_POSTGRES_PORT: 5432 # Default postgres port, container to container
19
+ TENSORUS_POSTGRES_USER: tensorus_user_compose
20
+ TENSORUS_POSTGRES_PASSWORD: tensorus_password_compose
21
+ TENSORUS_POSTGRES_DB: tensorus_db_compose
22
+ # TENSORUS_POSTGRES_DSN: "" # Can be set if preferred over individual params
23
+ TENSORUS_API_KEY_HEADER_NAME: "X-API-KEY"
24
+ TENSORUS_VALID_API_KEYS: "compose_key1,another_secure_key" # Comma-separated or JSON list of keys
25
+ TENSORUS_AUTH_JWT_ENABLED: "False"
26
+ # TENSORUS_AUTH_JWT_ISSUER: "your_issuer_here"
27
+ # TENSORUS_AUTH_JWT_AUDIENCE: "tensorus_api_audience"
28
+ # TENSORUS_AUTH_JWT_ALGORITHM: "RS256"
29
+ # TENSORUS_AUTH_JWT_JWKS_URI: "your_jwks_uri_here"
30
+ TENSORUS_AUTH_DEV_MODE_ALLOW_DUMMY_JWT: "False"
31
+ depends_on:
32
+ db:
33
+ condition: service_healthy # Wait for db to be healthy (Postgres specific healthcheck needed in db service)
34
+ # For simpler startup without healthcheck, just `depends_on: - db` is fine,
35
+ # but app might start before DB is ready. Entrypoint script in app can handle retries then.
36
+
37
+ db:
38
+ image: postgres:15-alpine
39
+ volumes:
40
+ - postgres_data:/var/lib/postgresql/data/ # Persist data
41
+ environment:
42
+ POSTGRES_USER: tensorus_user_compose # Must match app's TENSORUS_POSTGRES_USER
43
+ POSTGRES_PASSWORD: tensorus_password_compose # Must match app's TENSORUS_POSTGRES_PASSWORD
44
+ POSTGRES_DB: tensorus_db_compose # Must match app's TENSORUS_POSTGRES_DB
45
+ ports:
46
+ - "5433:5432" # Expose Postgres on host port 5433 to avoid conflict if local PG runs on 5432
47
+ healthcheck: # Basic healthcheck for Postgres
48
+ test: ["CMD-SHELL", "pg_isready -U tensorus_user_compose -d tensorus_db_compose"]
49
+ interval: 10s
50
+ timeout: 5s
51
+ retries: 5
52
+
53
+ volumes:
54
+ postgres_data: # Defines the named volume for data persistence
55
+ driver: local
docs/api_guide.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API Guide
2
+
3
+ The Tensorus Metadata System provides a comprehensive RESTful API for managing and interacting with tensor metadata.
4
+
5
+ ## Interactive API Documentation (Swagger UI)
6
+
7
+ The API is self-documenting using OpenAPI. Once the application is running (e.g., locally at `http://localhost:7860`), you can access the interactive Swagger UI at:
8
+
9
+ * **`/docs`**: [http://localhost:7860/docs](http://localhost:7860/docs)
10
+
11
+ This interface allows you to explore all available endpoints, view their request and response schemas, and even try out API calls directly from your browser.
12
+
13
+ ## Alternative API Documentation (ReDoc)
14
+
15
+ An alternative ReDoc interface is also available at:
16
+
17
+ * **`/redoc`**: [http://localhost:7860/redoc](http://localhost:7860/redoc)
18
+
19
+ ### ReDoc and Content Security Policy
20
+
21
+ Tensorus sets the header `Content-Security-Policy: default-src 'self'` by default. This strict policy blocks the external fonts, stylesheets, and scripts that ReDoc loads from CDNs, so the ReDoc page will appear blank unless the policy is relaxed.
22
+
23
+ To permit these assets you can override the policy via the `TENSORUS_CONTENT_SECURITY_POLICY` environment variable:
24
+
25
+ ```bash
26
+ TENSORUS_CONTENT_SECURITY_POLICY="default-src 'self'; script-src 'self' https://cdn.jsdelivr.net; style-src 'self' https://fonts.googleapis.com 'unsafe-inline'; font-src 'self' https://fonts.gstatic.com; img-src 'self' https://fastapi.tiangolo.com"
27
+ ```
28
+
29
+ Set the variable to an empty string or `NONE` to remove the header entirely.
30
+
31
+ ## Main API Categories
32
+
33
+ The API is organized into several categories based on functionality:
34
+
35
+ * **Tensor Descriptors:** Core operations for creating, reading, updating, deleting, and listing tensor descriptors.
36
+ * **Semantic Metadata (Per Tensor):** Managing human-readable names, descriptions, etc., associated with specific tensors, nested under `/tensor_descriptors/{tensor_id}/semantic/`.
37
+ * **Extended Metadata (Per Tensor):** CRUD operations for detailed metadata types, nested under `/tensor_descriptors/{tensor_id}/`:
38
+ * Lineage Metadata (`/lineage`)
39
+ * Computational Metadata (`/computational`)
40
+ * Quality Metadata (`/quality`)
41
+ * Relational Metadata (`/relational`)
42
+ * Usage Metadata (`/usage`)
43
+ * **Versioning & Lineage:** Endpoints for creating tensor versions and managing lineage relationships at a higher level.
44
+ * **Search & Aggregation:** Advanced querying, text-based search across metadata, and metadata aggregation.
45
+ * **Import/Export:** Endpoints for exporting and importing tensor metadata in JSON format.
46
+ * **Management:** Health checks and system metrics.
47
+ * **Authentication:** Write operations (POST, PUT, PATCH, DELETE) are protected by API keys. See [Installation and Configuration](./installation.md) for details on setting API keys. The API key should be passed in the HTTP header specified by `TENSORUS_API_KEY_HEADER_NAME` (default: `X-API-KEY`).
48
+
49
+ Please refer to the interactive `/docs` for detailed information on each endpoint, including request parameters, request bodies, and response structures.
docs/index.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to the Tensorus Metadata System
2
+
3
+ The Tensorus Metadata System is a comprehensive solution for managing metadata
4
+ associated with tensor data in an Agentic Tensor Database. It enables advanced
5
+ capabilities such as semantic search, lineage tracking, and version control.
6
+
7
+ This documentation provides an overview of the system, how to install and
8
+ configure it, and details about its API and metadata schemas.
9
+
10
+ ## Key Features
11
+
12
+ * **Rich Metadata Schemas:** Describe tensors with detailed structural, semantic, lineage, computational, quality, relational, and usage metadata.
13
+ * **Flexible Storage:** Supports in-memory storage for quick testing and a PostgreSQL backend for persistent, scalable storage.
14
+ * **Powerful API:** A comprehensive RESTful API for CRUD operations, advanced querying, search, aggregation, versioning, and lineage tracking.
15
+ * **Data Export/Import:** Utilities to export and import metadata in a standard JSON format.
16
+ * **Security:** Basic API key authentication for write operations and audit logging.
17
+ * **Monitoring:** Health check and basic metrics endpoints.
18
+ * **Enterprise Ready (Conceptual):** Designed with enterprise features like JWT authentication in mind.
19
+ * **Analytics:** Example APIs for deriving insights from metadata (e.g., stale tensors, complex tensors, co-occurring tags).
20
+ * **Dockerized:** Easy to deploy using Docker and Docker Compose.
21
+
22
+ Navigate through the documentation using the sidebar to learn more.
docs/installation.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installation and Configuration
2
+
3
+ ## Installation
4
+
5
+ ### Using Docker (Recommended for Local Development/Testing)
6
+
7
+ The easiest way to get Tensorus and its PostgreSQL backend running locally is by using Docker Compose:
8
+
9
+ 1. Ensure you have Docker and Docker Compose installed.
10
+ 2. Clone the repository (if applicable) or ensure you have the `docker-compose.yml` and application files.
11
+ 3. From the project root, run:
12
+ ```bash
13
+ docker-compose up --build
14
+ ```
15
+ 4. The API will typically be available at `http://localhost:7860`.
16
+
17
+ ### Manual Installation (From Source)
18
+
19
+ 1. Ensure you have Python 3.10+ installed.
20
+ 2. Clone the repository.
21
+ 3. Create and activate a virtual environment:
22
+ ```bash
23
+ python -m venv venv
24
+ source venv/bin/activate # On Windows: venv\Scripts\activate
25
+ ```
26
+ 4. Install dependencies:
27
+ ```bash
28
+ pip install -r requirements.txt
29
+ ```
30
+ Heavy machine-learning libraries used by the optional models are not
31
+ installed by default. Install them with the `[models]` extra when needed:
32
+ ```bash
33
+ pip install -e .[models]
34
+ ```
35
+ If you intend to run the test suite, also install the test requirements:
36
+ ```bash
37
+ pip install -r requirements-test.txt
38
+ ```
39
+ This ensures `fastapi>=0.110` is installed so the API is compatible with
40
+ Pydantic v2.
41
+ 5. *(Optional)* Install the example models package. The built-in models that
42
+ were previously part of this repository now live at
43
+ [https://github.com/tensorus/models](https://github.com/tensorus/models):
44
+ ```bash
45
+ pip install tensorus-models
46
+ ```
47
+ 6. Set up the necessary environment variables (see Configuration below).
48
+ 7. Run the application using Uvicorn:
49
+ ```bash
50
+ uvicorn tensorus.api:app --host 0.0.0.0 --port 7860
51
+ ```
52
+
53
+ This exposes all dataset and agent endpoints at `http://localhost:7860`.
54
+
55
+ ## Configuration
56
+
57
+ Tensorus is configured via environment variables. Key variables include:
58
+
59
+ * `TENSORUS_STORAGE_BACKEND`: Specifies the storage backend.
60
+ * `in_memory` (default): Uses in-memory storage (data is not persisted).
61
+ * `postgres`: Uses PostgreSQL.
62
+ * `TENSORUS_POSTGRES_HOST`: Hostname for PostgreSQL (e.g., `localhost` or `db` if using Docker Compose).
63
+ * `TENSORUS_POSTGRES_PORT`: Port for PostgreSQL (e.g., `5432`).
64
+ * `TENSORUS_POSTGRES_USER`: Username for PostgreSQL.
65
+ * `TENSORUS_POSTGRES_PASSWORD`: Password for PostgreSQL.
66
+ * `TENSORUS_POSTGRES_DB`: Database name for PostgreSQL.
67
+ * `TENSORUS_POSTGRES_DSN`: Alternative DSN connection string for PostgreSQL.
68
+ * `TENSORUS_VALID_API_KEYS`: List of valid API keys. Values can be a comma-separated string (e.g., `key1,key2,anotherkey`) or a JSON array. If no keys are required, set this to `[]`.
69
+ * `TENSORUS_API_KEY_HEADER_NAME`: HTTP header name for the API key (default: `X-API-KEY`).
70
+ * `TENSORUS_MINIMAL_IMPORT`: Set to any value to skip importing the optional
71
+ `tensorus-models` package for a lightweight installation.
72
+
73
+ ### JWT Authentication (Conceptual - For Future Use)
74
+ * `TENSORUS_AUTH_JWT_ENABLED`: `True` or `False` (default `False`).
75
+ * `TENSORUS_AUTH_JWT_ISSUER`: URL of the JWT issuer.
76
+ * `TENSORUS_AUTH_JWT_AUDIENCE`: Expected audience for JWTs.
77
+ * `TENSORUS_AUTH_JWT_ALGORITHM`: Algorithm (default `RS256`).
78
+ * `TENSORUS_AUTH_JWT_JWKS_URI`: URI to fetch JWKS.
79
+ * `TENSORUS_AUTH_DEV_MODE_ALLOW_DUMMY_JWT`: `True` to allow dummy JWTs for development if JWT auth is enabled (default `False`).
80
+
81
+ Refer to the `docker-compose.yml` for example environment variable settings when running with Docker. For manual setup, export these variables in your shell or use a `.env` file (if your setup supports it, though direct environment variables are primary).
82
+
83
+ ## Running Tests
84
+
85
+ Tensorus includes Python unit tests. After installing the dependencies you can run them with:
86
+
87
+ ```bash
88
+ pytest
89
+ ```
docs/metadata_schemas.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Metadata Schemas Overview
2
+
3
+ The Tensorus Metadata System employs a rich set of Pydantic schemas to define the structure and validation rules for metadata. These schemas ensure data consistency and provide a clear contract for API interactions.
4
+
5
+ ## Core Schema: TensorDescriptor
6
+
7
+ The `TensorDescriptor` is the fundamental metadata unit. It captures essential information about a tensor's structure, storage, and identity. Key fields include:
8
+
9
+ * `tensor_id`: Unique UUID for the tensor.
10
+ * `dimensionality`: Number of dimensions.
11
+ * `shape`: Size of each dimension.
12
+ * `data_type`: Data type of tensor elements (e.g., `float32`, `int64`).
13
+ * `storage_format`: How the tensor is physically stored (e.g., `raw`, `numpy_npz`).
14
+ * `creation_timestamp`, `last_modified_timestamp`.
15
+ * `owner`, `access_control` (which itself is a nested model detailing read/write permissions).
16
+ * `byte_size`, `checksum` (e.g., MD5, SHA256 of the tensor data).
17
+ * `compression_info` (details if tensor data is compressed).
18
+ * `tags`: A list of arbitrary string tags for categorization.
19
+ * `metadata`: A flexible dictionary for any other custom key-value metadata.
20
+
21
+ ## Semantic Metadata
22
+
23
+ Associated with a `TensorDescriptor` (one-to-many, identified by `name` per tensor), this schema describes the meaning and context of the tensor data. Key fields:
24
+
25
+ * `tensor_id`: Links to the parent `TensorDescriptor`.
26
+ * `name`: The specific name of this semantic annotation (e.g., "primary_class_label", "object_bounding_boxes", "feature_description_set").
27
+ * `description`: Detailed explanation of this semantic annotation.
28
+ * *(Note: The original broader concept of a single SemanticMetadata object per tensor with fields like `domain`, `purpose`, etc., has evolved. These broader concepts might be captured in `TensorDescriptor.tags`, `TensorDescriptor.metadata`, or specific named `SemanticMetadata` entries.)*
29
+
30
+ ## Extended Metadata Schemas
31
+
32
+ These provide more detailed and specialized information, typically one-to-one with a `TensorDescriptor`:
33
+
34
+ * **`LineageMetadata`**: Tracks origin (`source`), parent tensors (`parent_tensors`), a history of transformations (`transformation_history`), version string (`version`), version control details (`version_control`), and other provenance information (`provenance`).
35
+ * `LineageSource`: Details the origin (e.g., file, API, computation).
36
+ * `ParentTensorLink`: Links to parent tensors and describes the relationship.
37
+ * `TransformationStep`: Describes an operation in the transformation history.
38
+ * `VersionControlInfo`: Git-like versioning details for the source or tensor itself.
39
+
40
+ * **`ComputationalMetadata`**: Describes how the tensor was computed, including the `algorithm` used, `parameters`, reference to a `computational_graph_ref`, `execution_environment` details, `computation_time_seconds`, and `hardware_info`.
41
+
42
+ * **`QualityMetadata`**: Captures information about data quality. Includes:
43
+ * `statistics` (`QualityStatistics` model: min, max, mean, std_dev, median, variance, percentiles, histogram).
44
+ * `missing_values` (`MissingValuesInfo` model: count, percentage, imputation strategy).
45
+ * `outliers` (`OutlierInfo` model: count, percentage, detection method).
46
+ - `noise_level`, `confidence_score`, `validation_results` (custom checks), `drift_score`.
47
+
48
+ * **`RelationalMetadata`**: Describes relationships to other tensors (`related_tensors` via `RelatedTensorLink`), membership in `collections`, explicit `dependencies` on other tensors, and `dataset_context`.
49
+
50
+ * **`UsageMetadata`**: Tracks how the tensor is used:
51
+ * `access_history` (list of `UsageAccessRecord` detailing who/what accessed it, when, and how).
52
+ * `usage_frequency` (auto-calculated from history or can be set).
53
+ * `last_accessed_at` (auto-calculated from history or can be set).
54
+ * `application_references` (list of applications or models that use this tensor).
55
+ * `purpose` (dictionary describing purposes, e.g. for specific model training).
56
+
57
+ For the exact field definitions, types, optionality, default values, and validation rules for each schema, please refer to the source code in the `tensorus/metadata/schemas.py` module or consult the schemas provided in the interactive API documentation at `/docs`. The I/O schemas for export/import are defined in `tensorus/metadata/schemas_iodata.py`.
mkdocs.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ site_name: Tensorus Metadata System
2
+ site_description: Documentation for the Tensorus Metadata System.
3
+ nav:
4
+ - Home: index.md
5
+ - Installation: installation.md
6
+ - API Guide: api_guide.md
7
+ - Metadata Schemas: metadata_schemas.md
8
+ theme: readthedocs
9
+ # Optional: Add repo_url, edit_uri for "Edit on GitHub" links if relevant
10
+ # repo_url: https://github.com/your_username/tensorus
11
+ # edit_uri: edit/main/docs/
pages/1_Dashboard.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/1_Dashboard.py (Modifications for Step 3)
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import numpy as np
6
+ import plotly.express as px
7
+ import time
8
+ # Updated imports to use API-backed functions
9
+ from .ui_utils import get_dashboard_metrics, list_all_agents, get_agent_status # MODIFIED
10
+
11
+ st.set_page_config(page_title="Tensorus Dashboard", layout="wide")
12
+
13
+ st.title("📊 Operations Dashboard")
14
+ st.caption("Overview of Tensorus datasets and agent activity from API.")
15
+
16
+ # --- Fetch Data ---
17
+ # Use st.cache_data for API calls that don't need constant updates
18
+ # or manage refresh manually. For simplicity, call directly on rerun/button click.
19
+ metrics_data = None
20
+ agent_list = None # Fetch full agent list for detailed status display
21
+
22
+ # Button to force refresh
23
+ if st.button("🔄 Refresh Dashboard Data"):
24
+ # Clear previous cache if any or just proceed to refetch
25
+ metrics_data = get_dashboard_metrics()
26
+ agent_list = list_all_agents()
27
+ st.session_state['dashboard_metrics'] = metrics_data # Store in session state
28
+ st.session_state['dashboard_agents'] = agent_list
29
+ st.rerun() # Rerun the script to reflect fetched data
30
+ else:
31
+ # Try to load from session state or fetch if not present
32
+ if 'dashboard_metrics' not in st.session_state:
33
+ st.session_state['dashboard_metrics'] = get_dashboard_metrics()
34
+ if 'dashboard_agents' not in st.session_state:
35
+ st.session_state['dashboard_agents'] = list_all_agents()
36
+
37
+ metrics_data = st.session_state['dashboard_metrics']
38
+ agent_list = st.session_state['dashboard_agents']
39
+
40
+
41
+ # --- Display Metrics ---
42
+ st.subheader("System Metrics")
43
+ if metrics_data:
44
+ col1, col2, col3 = st.columns(3)
45
+ col1.metric("Total Datasets", metrics_data.get('dataset_count', 'N/A'))
46
+ col2.metric("Total Records (Est.)", f"{metrics_data.get('total_records_est', 0):,}")
47
+ # Agent status summary from metrics
48
+ agent_summary = metrics_data.get('agent_status_summary', {})
49
+ running_agents = agent_summary.get('running', 0) + agent_summary.get('starting', 0)
50
+ col3.metric("Running Agents", running_agents)
51
+
52
+ st.divider()
53
+
54
+ # --- Performance Metrics Row ---
55
+ st.subheader("Performance Indicators (Simulated)")
56
+ pcol1, pcol2, pcol3, pcol4 = st.columns(4)
57
+ pcol1.metric("Ingestion Rate (rec/s)", f"{metrics_data.get('data_ingestion_rate', 0.0):.1f}")
58
+ pcol2.metric("Avg Query Latency (ms)", f"{metrics_data.get('avg_query_latency_ms', 0.0):.1f}")
59
+ pcol3.metric("Latest RL Reward", f"{metrics_data.get('rl_latest_reward', 'N/A')}")
60
+ pcol4.metric("Best AutoML Score", f"{metrics_data.get('automl_best_score', 'N/A')}")
61
+
62
+ else:
63
+ st.warning("Could not fetch dashboard metrics from the API.")
64
+
65
+ st.divider()
66
+
67
+ # --- Agent Status Details ---
68
+ st.subheader("Agent Status")
69
+ if agent_list:
70
+ num_agents = len(agent_list)
71
+ cols = st.columns(max(1, num_agents)) # Create columns for agents
72
+
73
+ for i, agent_info in enumerate(agent_list):
74
+ agent_id = agent_info.get('id')
75
+ with cols[i % len(cols)]: # Distribute agents into columns
76
+ with st.container(border=True):
77
+ st.markdown(f"**{agent_info.get('name', 'Unknown Agent')}** (`{agent_id}`)")
78
+ # Fetch detailed status for more info if needed, or use basic status from list
79
+ # status_details = get_agent_status(agent_id) # Can make page slower
80
+ status = agent_info.get('status', 'unknown')
81
+ status_color = "green" if status in ["running", "starting"] else ("orange" if status in ["stopping"] else ("red" if status in ["error"] else "grey"))
82
+ st.markdown(f"Status: :{status_color}[**{status.upper()}**]")
83
+
84
+ # Display config from the list info
85
+ with st.expander("Config"):
86
+ st.json(agent_info.get('config', {}), expanded=False)
87
+ else:
88
+ st.warning("Could not fetch agent list from the API.")
89
+
90
+ st.divider()
91
+
92
+ # --- Performance Monitoring Chart (Using simulated data from metrics for now) ---
93
+ st.subheader("Performance Monitoring (Placeholder Graph)")
94
+ if metrics_data:
95
+ # Create some fake historical data for plotting based on current metrics
96
+ history_len = 20
97
+ # Use session state to persist some history for smoother simulation
98
+ if 'sim_history' not in st.session_state:
99
+ st.session_state['sim_history'] = pd.DataFrame({
100
+ 'Ingestion Rate': np.random.rand(history_len) * metrics_data.get('data_ingestion_rate', 10),
101
+ 'Query Latency': np.random.rand(history_len) * metrics_data.get('avg_query_latency_ms', 100),
102
+ 'RL Reward': np.random.randn(history_len) * 5 + (metrics_data.get('rl_latest_reward', 0) or 0)
103
+ })
104
+
105
+ # Update history with latest point
106
+ latest_data = pd.DataFrame({
107
+ 'Ingestion Rate': [metrics_data.get('data_ingestion_rate', 0.0)],
108
+ 'Query Latency': [metrics_data.get('avg_query_latency_ms', 0.0)],
109
+ 'RL Reward': [metrics_data.get('rl_latest_reward', 0) or 0] # Handle None
110
+ })
111
+ st.session_state['sim_history'] = pd.concat([st.session_state['sim_history'].iloc[1:], latest_data], ignore_index=True)
112
+
113
+
114
+ # Use Plotly for better interactivity
115
+ try:
116
+ fig = px.line(st.session_state['sim_history'], title="Simulated Performance Metrics Over Time")
117
+ fig.update_layout(legend_title_text='Metrics')
118
+ st.plotly_chart(fig, use_container_width=True)
119
+ except Exception as e:
120
+ st.warning(f"Could not display performance chart: {e}")
121
+
122
+ else:
123
+ st.info("Performance metrics unavailable.")
124
+
125
+ st.caption(f"Dashboard data timestamp: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metrics_data.get('timestamp', time.time())) if metrics_data else time.time())}")
pages/2_Control_Panel.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/2_Control_Panel.py (Modifications for Step 3)
2
+
3
+ import streamlit as st
4
+ import time
5
+ import json
6
+ # Use the updated API-backed functions
7
+ from .ui_utils import (
8
+ list_all_agents,
9
+ get_agent_status,
10
+ get_agent_logs,
11
+ start_agent,
12
+ stop_agent,
13
+ update_agent_config,
14
+ )
15
+
16
+ st.set_page_config(page_title="Agent Control Panel", layout="wide")
17
+
18
+ st.title("🕹️ Multi-Agent Control Panel")
19
+ st.caption("Manage and monitor Tensorus agents via API.")
20
+
21
+ # Fetch agent list from API
22
+ agent_list = list_all_agents()
23
+
24
+ if not agent_list:
25
+ st.error("Could not fetch agent list from API. Please ensure the backend is running and reachable.")
26
+ st.stop()
27
+
28
+ # Create a mapping from name to ID for easier selection
29
+ # Handle potential duplicate names if necessary, though IDs should be unique
30
+ agent_options = {agent['name']: agent['id'] for agent in agent_list}
31
+ # Add ID to name if names aren't unique (optional robustness)
32
+ # agent_options = {f"{agent['name']} ({agent['id']})": agent['id'] for agent in agent_list}
33
+
34
+
35
+ selected_agent_name = st.selectbox("Select Agent:", options=agent_options.keys())
36
+
37
+ if selected_agent_name:
38
+ selected_agent_id = agent_options[selected_agent_name]
39
+ st.divider()
40
+ st.subheader(f"Control: {selected_agent_name} (`{selected_agent_id}`)")
41
+
42
+ # Use session state to store fetched status and logs for the selected agent
43
+ # This avoids refetching constantly unless a refresh is triggered
44
+ agent_state_key = f"agent_status_{selected_agent_id}"
45
+ agent_logs_key = f"agent_logs_{selected_agent_id}"
46
+
47
+ # Button to force refresh status and logs
48
+ if st.button(f"🔄 Refresh Status & Logs##{selected_agent_id}"): # Unique key per agent
49
+ st.session_state[agent_state_key] = get_agent_status(selected_agent_id)
50
+ st.session_state[agent_logs_key] = get_agent_logs(selected_agent_id)
51
+ st.rerun() # Rerun to display refreshed data
52
+
53
+ # Fetch status if not in session state or refresh button wasn't just clicked
54
+ if agent_state_key not in st.session_state:
55
+ st.session_state[agent_state_key] = get_agent_status(selected_agent_id)
56
+
57
+ status_info = st.session_state[agent_state_key]
58
+
59
+ if status_info:
60
+ status = status_info.get('status', 'unknown')
61
+ status_color = "green" if status in ["running", "starting"] else ("orange" if status in ["stopping"] else ("red" if status in ["error"] else "grey"))
62
+ st.markdown(f"Current Status: :{status_color}[**{status.upper()}**]")
63
+ last_log_ts = status_info.get('last_log_timestamp')
64
+ if last_log_ts:
65
+ st.caption(f"Last Log Entry (approx.): {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_log_ts))}")
66
+ else:
67
+ st.error(f"Could not retrieve status for agent '{selected_agent_name}'.")
68
+
69
+ # Control Buttons (Now call API functions)
70
+ col1, col2, col3 = st.columns([1, 1, 5])
71
+ is_running = status_info and status_info.get('status') == 'running'
72
+ is_stopped = status_info and status_info.get('status') == 'stopped'
73
+
74
+ with col1:
75
+ start_disabled = not is_stopped # Disable if not stopped
76
+ if st.button("▶️ Start", key=f"start_{selected_agent_id}", disabled=start_disabled):
77
+ if start_agent(selected_agent_id): # API call returns success/fail
78
+ # Trigger refresh after short delay to allow backend state change (optimistic)
79
+ time.sleep(1.0)
80
+ # Clear state cache and rerun
81
+ if agent_state_key in st.session_state: del st.session_state[agent_state_key]
82
+ if agent_logs_key in st.session_state: del st.session_state[agent_logs_key]
83
+ st.rerun()
84
+ with col2:
85
+ stop_disabled = not is_running # Disable if not running
86
+ if st.button("⏹️ Stop", key=f"stop_{selected_agent_id}", disabled=stop_disabled):
87
+ if stop_agent(selected_agent_id): # API call returns success/fail
88
+ time.sleep(1.0)
89
+ if agent_state_key in st.session_state: del st.session_state[agent_state_key]
90
+ if agent_logs_key in st.session_state: del st.session_state[agent_logs_key]
91
+ st.rerun()
92
+
93
+ st.divider()
94
+
95
+ # Configuration & Logs
96
+ tab1, tab2 = st.tabs(["Configuration", "Logs"])
97
+
98
+ with tab1:
99
+ if status_info and 'config' in status_info:
100
+ current_config = status_info['config']
101
+ st.write("Current configuration:")
102
+ st.json(current_config)
103
+
104
+ with st.expander("Edit configuration"):
105
+ form = st.form(key=f"cfg_form_{selected_agent_id}")
106
+ config_text = form.text_area(
107
+ "Configuration JSON",
108
+ value=json.dumps(current_config, indent=2),
109
+ height=200,
110
+ )
111
+ submitted = form.form_submit_button("Update")
112
+ if submitted:
113
+ try:
114
+ new_cfg = json.loads(config_text)
115
+ if update_agent_config(selected_agent_id, new_cfg):
116
+ if agent_state_key in st.session_state:
117
+ del st.session_state[agent_state_key]
118
+ time.sleep(0.5)
119
+ st.rerun()
120
+ except json.JSONDecodeError as e:
121
+ form.error(f"Invalid JSON: {e}")
122
+ else:
123
+ st.warning("Configuration not available.")
124
+
125
+ with tab2:
126
+ st.write("Recent logs (fetched from API):")
127
+ # Fetch logs if not in session state
128
+ if agent_logs_key not in st.session_state:
129
+ st.session_state[agent_logs_key] = get_agent_logs(selected_agent_id)
130
+
131
+ logs = st.session_state[agent_logs_key]
132
+ if logs is not None:
133
+ st.code("\n".join(logs), language="log")
134
+ else:
135
+ st.error("Could not retrieve logs.")
136
+
137
+ else:
138
+ st.info("Select an agent from the dropdown above.")
pages/3_NQL_Chatbot.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/3_NQL_Chatbot.py
2
+
3
+ import streamlit as st
4
+ from .ui_utils import execute_nql_query # MODIFIED
5
+ import pandas as pd # Added import for pandas
6
+
7
+ st.set_page_config(page_title="NQL Chatbot", layout="wide")
8
+
9
+ st.title("💬 Natural Query Language (NQL) Chatbot")
10
+ st.caption("Query Tensorus datasets using natural language.")
11
+ st.info("Backend uses Regex-based NQL Agent. LLM integration is future work.")
12
+
13
+ # Initialize chat history
14
+ if "messages" not in st.session_state:
15
+ st.session_state.messages = []
16
+
17
+ # Display chat messages from history on app rerun
18
+ for message in st.session_state.messages:
19
+ with st.chat_message(message["role"]):
20
+ st.markdown(message["content"])
21
+ if "results" in message and message["results"] is not None and not message["results"].empty: # Check if DataFrame is not None and not empty
22
+ st.dataframe(message["results"], use_container_width=True) # Display results as dataframe
23
+ elif "error" in message:
24
+ st.error(message["error"])
25
+
26
+
27
+ # React to user input
28
+ if prompt := st.chat_input("Enter your query (e.g., 'get all data from my_dataset')"):
29
+ # Display user message in chat message container
30
+ st.chat_message("user").markdown(prompt)
31
+ # Add user message to chat history
32
+ st.session_state.messages.append({"role": "user", "content": prompt})
33
+
34
+ # Get assistant response from NQL Agent API
35
+ with st.spinner("Processing query..."):
36
+ nql_response = execute_nql_query(prompt)
37
+
38
+ response_content = ""
39
+ results_df = None
40
+ error_msg = None
41
+
42
+ if nql_response:
43
+ response_content = nql_response.get("message", "Error processing response.")
44
+ if nql_response.get("success"):
45
+ results_list = nql_response.get("results")
46
+ if results_list:
47
+ # Convert results list (containing dicts with 'metadata', 'shape', etc.) to DataFrame
48
+ # Extract relevant fields for display
49
+ display_data = []
50
+ for res in results_list:
51
+ row = {
52
+ "record_id": res["metadata"].get("record_id"),
53
+ "shape": str(res.get("shape")), # Convert shape list to string
54
+ "dtype": res.get("dtype"),
55
+ **res["metadata"] # Flatten metadata into columns
56
+ }
57
+ # Remove potentially large 'tensor' data from direct display
58
+ row.pop('tensor', None)
59
+ # Avoid duplicate metadata keys if also present at top level
60
+ row.pop('shape', None)
61
+ row.pop('dtype', None)
62
+ row.pop('record_id', None)
63
+ display_data.append(row)
64
+
65
+ if display_data:
66
+ results_df = pd.DataFrame(display_data)
67
+
68
+ # Augment message if results found
69
+ count = nql_response.get("count")
70
+ if count is not None:
71
+ response_content += f" Found {count} record(s)."
72
+
73
+ else:
74
+ # NQL agent indicated failure (parsing or execution)
75
+ error_msg = response_content # Use the message as the error
76
+
77
+ else:
78
+ # API call itself failed (connection error, etc.)
79
+ response_content = "Failed to get response from the NQL agent."
80
+ error_msg = response_content
81
+
82
+ # Display assistant response in chat message container
83
+ message_data = {"role": "assistant", "content": response_content}
84
+ with st.chat_message("assistant"):
85
+ st.markdown(response_content)
86
+ if results_df is not None and not results_df.empty: # Check if DataFrame is not None and not empty
87
+ st.dataframe(results_df, use_container_width=True)
88
+ message_data["results"] = results_df # Store for history display if needed (might be large)
89
+ elif error_msg:
90
+ st.error(error_msg)
91
+ message_data["error"] = error_msg
92
+
93
+ # Add assistant response to chat history
94
+ st.session_state.messages.append(message_data)
pages/4_Data_Explorer.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/4_Data_Explorer.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ from .ui_utils import list_datasets, fetch_dataset_data # MODIFIED
7
+ import torch # Needed if we want to recreate tensors for inspection/plotting
8
+
9
+ st.set_page_config(page_title="Data Explorer", layout="wide")
10
+
11
+ st.title("🔍 Interactive Data Explorer")
12
+ st.caption("Browse, filter, and visualize Tensorus datasets.")
13
+
14
+ # --- Dataset Selection ---
15
+ datasets = list_datasets()
16
+ if not datasets:
17
+ st.warning("No datasets found or API connection failed. Cannot explore data.")
18
+ st.stop() # Stop execution if no datasets
19
+
20
+ selected_dataset = st.selectbox("Select Dataset:", datasets)
21
+
22
+ # --- Data Fetching & Filtering ---
23
+ if selected_dataset:
24
+ st.subheader(f"Exploring: {selected_dataset}")
25
+
26
+ PAGE_SIZE = 20
27
+ page_key = f"page_{selected_dataset}"
28
+ if page_key not in st.session_state:
29
+ st.session_state[page_key] = 0
30
+
31
+ page = st.session_state[page_key]
32
+ offset = page * PAGE_SIZE
33
+ records = fetch_dataset_data(selected_dataset, offset=offset, limit=PAGE_SIZE)
34
+
35
+ prev_disabled = page == 0
36
+ next_disabled = records is None or len(records) < PAGE_SIZE
37
+
38
+ col_prev, col_next = st.columns(2)
39
+ with col_prev:
40
+ if st.button("Previous", disabled=prev_disabled, key="prev_btn"):
41
+ st.session_state[page_key] = max(0, page - 1)
42
+ st.experimental_rerun()
43
+ with col_next:
44
+ if st.button("Next", disabled=next_disabled, key="next_btn"):
45
+ st.session_state[page_key] = page + 1
46
+ st.experimental_rerun()
47
+
48
+ if records is None:
49
+ st.error("Failed to fetch data for the selected dataset.")
50
+ st.stop()
51
+ elif not records:
52
+ st.info("Selected dataset is empty.")
53
+ st.stop()
54
+
55
+ start_idx = offset + 1
56
+ end_idx = offset + len(records)
57
+ st.info(f"Displaying records {start_idx} - {end_idx} (page {page + 1})")
58
+
59
+ # Create DataFrame from metadata for filtering/display
60
+ metadata_list = [r['metadata'] for r in records]
61
+ df_meta = pd.DataFrame(metadata_list)
62
+
63
+ # --- Metadata Filtering UI ---
64
+ st.sidebar.header("Filter by Metadata")
65
+ filter_cols = st.sidebar.multiselect("Select metadata columns to filter:", options=df_meta.columns.tolist())
66
+
67
+ filtered_df = df_meta.copy()
68
+ for col in filter_cols:
69
+ unique_values = filtered_df[col].dropna().unique().tolist()
70
+ if pd.api.types.is_numeric_dtype(filtered_df[col]):
71
+ # Numeric filter (slider)
72
+ min_val, max_val = float(filtered_df[col].min()), float(filtered_df[col].max())
73
+ if min_val < max_val:
74
+ selected_range = st.sidebar.slider(f"Filter {col}:", min_val, max_val, (min_val, max_val))
75
+ filtered_df = filtered_df[filtered_df[col].between(selected_range[0], selected_range[1])]
76
+ else:
77
+ st.sidebar.caption(f"{col}: Single numeric value ({min_val}), no range filter.")
78
+
79
+ elif len(unique_values) > 0 and len(unique_values) <= 20: # Limit dropdown options
80
+ # Categorical filter (multiselect)
81
+ selected_values = st.sidebar.multiselect(f"Filter {col}:", options=unique_values, default=unique_values)
82
+ if selected_values: # Only filter if some values are selected
83
+ filtered_df = filtered_df[filtered_df[col].isin(selected_values)]
84
+ else: # If user deselects everything, show nothing
85
+ filtered_df = filtered_df[filtered_df[col].isnull()] # Hack to get empty DF matching columns
86
+
87
+ else:
88
+ st.sidebar.text_input(f"Filter {col} (Text contains):", key=f"text_{col}")
89
+ search_term = st.session_state.get(f"text_{col}", "").lower()
90
+ if search_term:
91
+ # Ensure column is string type before using .str.contains
92
+ filtered_df = filtered_df[filtered_df[col].astype(str).str.lower().str.contains(search_term, na=False)]
93
+
94
+
95
+ st.divider()
96
+ st.subheader("Filtered Data View")
97
+ st.write(f"{len(filtered_df)} records matching filters.")
98
+ st.dataframe(filtered_df, use_container_width=True)
99
+
100
+ # --- Tensor Preview & Visualization ---
101
+ st.divider()
102
+ st.subheader("Tensor Preview")
103
+
104
+ if not filtered_df.empty:
105
+ # Allow selecting a record ID from the filtered results
106
+ record_ids = filtered_df['record_id'].tolist()
107
+ selected_record_id = st.selectbox("Select Record ID to Preview Tensor:", record_ids)
108
+
109
+ if selected_record_id:
110
+ # Find the full record data corresponding to the selected ID
111
+ selected_record = next((r for r in records if r['metadata'].get('record_id') == selected_record_id), None)
112
+
113
+ if selected_record:
114
+ st.write("Metadata:")
115
+ st.json(selected_record['metadata'])
116
+
117
+ shape = selected_record.get("shape")
118
+ dtype = selected_record.get("dtype")
119
+ data_list = selected_record.get("data")
120
+
121
+ st.write(f"Tensor Info: Shape={shape}, Dtype={dtype}")
122
+
123
+ try:
124
+ # Recreate tensor for potential plotting/display
125
+ # Be careful with large tensors in Streamlit UI!
126
+ # We might only want to show info or small slices.
127
+ if shape and dtype and data_list is not None:
128
+ tensor = torch.tensor(data_list, dtype=getattr(torch, dtype, torch.float32)) # Use getattr for dtype
129
+ st.write("Tensor Data (first few elements):")
130
+ st.code(f"{tensor.flatten()[:10].numpy()}...") # Show flattened start
131
+
132
+ # --- Simple Visualizations ---
133
+ if tensor.ndim == 1 and tensor.numel() > 1:
134
+ st.line_chart(tensor.numpy())
135
+ elif tensor.ndim == 2 and tensor.shape[0] > 1 and tensor.shape[1] > 1 :
136
+ # Simple heatmap using plotly (requires plotly)
137
+ try:
138
+ fig = px.imshow(tensor.numpy(), title="Tensor Heatmap", aspect="auto")
139
+ st.plotly_chart(fig, use_container_width=True)
140
+ except Exception as plot_err:
141
+ st.warning(f"Could not generate heatmap: {plot_err}")
142
+ elif tensor.ndim == 3 and tensor.shape[0] in [1, 3]: # Basic image check (C, H, W) or (1, H, W)
143
+ try:
144
+ # Permute if needed (e.g., C, H, W -> H, W, C for display)
145
+ if tensor.shape[0] in [1, 3]:
146
+ display_tensor = tensor.permute(1, 2, 0).squeeze() # H, W, C or H, W
147
+ # Clamp/normalize data to display range [0, 1] or [0, 255] - basic attempt
148
+ display_tensor = (display_tensor - display_tensor.min()) / (display_tensor.max() - display_tensor.min() + 1e-6)
149
+ st.image(display_tensor.numpy(), caption="Tensor as Image (Attempted)", use_column_width=True)
150
+ except ImportError:
151
+ st.warning("Pillow needed for image display (`pip install Pillow`)")
152
+ except Exception as img_err:
153
+ st.warning(f"Could not display tensor as image: {img_err}")
154
+ else:
155
+ st.info("No specific visualization available for this tensor shape/dimension.")
156
+
157
+ else:
158
+ st.warning("Tensor data, shape, or dtype missing in the record.")
159
+
160
+ except Exception as tensor_err:
161
+ st.error(f"Error processing tensor data for preview: {tensor_err}")
162
+ else:
163
+ st.warning("Selected record details not found (this shouldn't happen).")
164
+ else:
165
+ st.info("Select a record ID above to preview its tensor.")
166
+ else:
167
+ st.info("No records match the current filters.")
pages/5_API_Playground.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/5_API_Playground.py
2
+
3
+ import streamlit as st
4
+ import streamlit.components.v1 as components
5
+ from .ui_utils import TENSORUS_API_URL, get_api_status # MODIFIED
6
+
7
+ st.set_page_config(page_title="API Playground", layout="wide")
8
+
9
+ st.title("🚀 API Playground & Documentation Hub")
10
+ st.caption("Explore and interact with the Tensorus REST API.")
11
+
12
+ # Check if API is running
13
+ api_running = get_api_status()
14
+
15
+ if not api_running:
16
+ st.error(
17
+ f"The Tensorus API backend does not seem to be running at {TENSORUS_API_URL}. "
18
+ "Please start the backend (`uvicorn api:app --reload`) to use the API Playground."
19
+ )
20
+ st.stop() # Stop execution if API is not available
21
+ else:
22
+ st.success(f"Connected to API backend at {TENSORUS_API_URL}")
23
+
24
+ st.markdown(
25
+ f"""
26
+ This section provides live, interactive documentation for the Tensorus API,
27
+ powered by FastAPI's OpenAPI integration. You can explore endpoints,
28
+ view schemas, and even try out API calls directly in your browser.
29
+
30
+ * **Swagger UI:** A graphical interface for exploring and testing API endpoints.
31
+ * **ReDoc:** Alternative documentation format, often preferred for reading.
32
+
33
+ Select a view below:
34
+ """
35
+ )
36
+
37
+ # Use tabs to embed Swagger and ReDoc
38
+ tab1, tab2 = st.tabs(["Swagger UI", "ReDoc"])
39
+
40
+ # Construct the documentation URLs based on the API base URL
41
+ swagger_url = f"{TENSORUS_API_URL}/docs"
42
+ redoc_url = f"{TENSORUS_API_URL}/redoc"
43
+
44
+ with tab1:
45
+ st.subheader("Swagger UI")
46
+ st.markdown(f"Explore the API interactively. [Open in new tab]({swagger_url})")
47
+ # Embed Swagger UI using an iframe
48
+ components.iframe(swagger_url, height=800, scrolling=True)
49
+
50
+ with tab2:
51
+ st.subheader("ReDoc")
52
+ st.markdown(f"View the API documentation. [Open in new tab]({redoc_url})")
53
+ # Embed ReDoc using an iframe
54
+ components.iframe(redoc_url, height=800, scrolling=True)
55
+
56
+ st.divider()
57
+ st.caption("Note: Ensure the Tensorus API backend is running to interact with the playground.")
pages/6_Financial_Forecast_Demo.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.graph_objects as go # Using graph_objects for more control
5
+ import time
6
+ import torch
7
+
8
+ # Assuming tensorus is in PYTHONPATH or installed
9
+ from tensorus.tensor_storage import TensorStorage
10
+ from tensorus.financial_data_generator import generate_financial_data
11
+ from tensorus.time_series_predictor import load_time_series_from_tensorus, train_arima_and_predict, store_predictions_to_tensorus
12
+
13
+ # Attempt to load shared CSS
14
+ try:
15
+ from pages.pages_shared_utils import load_css
16
+ LOAD_CSS_AVAILABLE = True
17
+ except ImportError:
18
+ LOAD_CSS_AVAILABLE = False
19
+ def load_css(): # Dummy function
20
+ st.markdown("<style>/* No shared CSS found */</style>", unsafe_allow_html=True)
21
+ # st.warning("Could not load shared CSS from pages.pages_shared_utils.") # Keep UI cleaner
22
+
23
+ # Page Configuration
24
+ st.set_page_config(page_title="Financial Forecast Demo", layout="wide")
25
+ if LOAD_CSS_AVAILABLE:
26
+ load_css()
27
+
28
+ # --- Constants ---
29
+ RAW_DATASET_NAME = "financial_raw_data"
30
+ PREDICTION_DATASET_NAME = "financial_predictions"
31
+ TIME_SERIES_NAME = "synthetic_stock_close"
32
+ # Use a demo-specific storage path to avoid conflicts with main app
33
+ TENSOR_STORAGE_PATH = "tensor_data_financial_demo"
34
+
35
+
36
+ def get_tensor_storage_instance():
37
+ """Initializes and returns a TensorStorage instance for the demo."""
38
+ return TensorStorage(storage_path=TENSOR_STORAGE_PATH)
39
+
40
+ def show_financial_demo_page():
41
+ """
42
+ Main function to display the Streamlit page for the financial demo.
43
+ """
44
+ st.title("📈 Financial Time Series Forecasting Demo")
45
+ st.markdown("""
46
+ This demo showcases time series forecasting using an ARIMA model on synthetically generated
47
+ financial data. Data is generated, stored in Tensorus, loaded for model training,
48
+ and predictions are then stored back and visualized.
49
+ """)
50
+
51
+ # Initialize session state variables
52
+ if 'raw_financial_df' not in st.session_state:
53
+ st.session_state.raw_financial_df = None
54
+ if 'loaded_historical_series' not in st.session_state:
55
+ st.session_state.loaded_historical_series = None
56
+ if 'predictions_series' not in st.session_state:
57
+ st.session_state.predictions_series = None
58
+ if 'arima_p' not in st.session_state: # For number_input persistence
59
+ st.session_state.arima_p = 5
60
+ if 'arima_d' not in st.session_state:
61
+ st.session_state.arima_d = 1
62
+ if 'arima_q' not in st.session_state:
63
+ st.session_state.arima_q = 0
64
+ if 'n_predictions' not in st.session_state:
65
+ st.session_state.n_predictions = 30
66
+
67
+
68
+ # --- Section 1: Data Generation & Ingestion ---
69
+ st.header("1. Data Generation & Ingestion")
70
+ if st.button("Generate & Ingest Sample Financial Data", key="generate_data_button"):
71
+ with st.spinner("Generating and ingesting data..."):
72
+ try:
73
+ storage = get_tensor_storage_instance()
74
+
75
+ # Generate sample financial data
76
+ df = generate_financial_data(days=365, initial_price=150, trend_slope=0.1, seasonality_amplitude=20)
77
+ st.session_state.raw_financial_df = df # Store for immediate plotting
78
+
79
+ # Ensure dataset exists
80
+ if RAW_DATASET_NAME not in storage.list_datasets():
81
+ storage.create_dataset(RAW_DATASET_NAME)
82
+ st.info(f"Dataset '{RAW_DATASET_NAME}' created.")
83
+ else:
84
+ st.info(f"Dataset '{RAW_DATASET_NAME}' already exists. Using existing.")
85
+
86
+ # Prepare data for TensorStorage
87
+ series_to_store = torch.tensor(df['Close'].values, dtype=torch.float32)
88
+ dates_for_metadata = df['Date'].dt.strftime('%Y-%m-%d').tolist()
89
+ metadata = {
90
+ "name": TIME_SERIES_NAME,
91
+ "dates": dates_for_metadata,
92
+ "source": "financial_demo_generator",
93
+ "description": f"Daily closing prices for {TIME_SERIES_NAME}"
94
+ }
95
+
96
+ record_id = storage.insert(RAW_DATASET_NAME, series_to_store, metadata)
97
+ if record_id:
98
+ st.success(f"Data for '{TIME_SERIES_NAME}' ingested into '{RAW_DATASET_NAME}' with Record ID: {record_id}")
99
+ # Also load it into loaded_historical_series for consistency in the workflow
100
+ st.session_state.loaded_historical_series = pd.Series(df['Close'].values, index=pd.to_datetime(df['Date']), name=TIME_SERIES_NAME)
101
+ st.session_state.predictions_series = None # Clear previous predictions
102
+ else:
103
+ st.error("Failed to ingest data into Tensorus.")
104
+ except Exception as e:
105
+ st.error(f"Error during data generation/ingestion: {e}")
106
+
107
+ # --- Section 2: Historical Data Visualization ---
108
+ st.header("2. Historical Data Visualization")
109
+ if st.button("Load and View Historical Data from Tensorus", key="load_historical_button"):
110
+ with st.spinner(f"Loading '{TIME_SERIES_NAME}' from Tensorus..."):
111
+ try:
112
+ storage = get_tensor_storage_instance()
113
+ loaded_series = load_time_series_from_tensorus(
114
+ storage,
115
+ RAW_DATASET_NAME,
116
+ series_metadata_field="name",
117
+ series_name=TIME_SERIES_NAME,
118
+ date_metadata_field="dates"
119
+ )
120
+ if loaded_series is not None:
121
+ st.session_state.loaded_historical_series = loaded_series
122
+ st.session_state.predictions_series = None # Clear previous predictions
123
+ st.success(f"Successfully loaded '{TIME_SERIES_NAME}' from Tensorus.")
124
+ else:
125
+ st.warning(f"Could not find or load '{TIME_SERIES_NAME}' from Tensorus dataset '{RAW_DATASET_NAME}'. Generate data first if it's not there.")
126
+ except Exception as e:
127
+ st.error(f"Error loading historical data: {e}")
128
+
129
+ if st.session_state.loaded_historical_series is not None:
130
+ st.subheader(f"Historical Data: {st.session_state.loaded_historical_series.name}")
131
+ fig_hist = go.Figure()
132
+ fig_hist.add_trace(go.Scatter(
133
+ x=st.session_state.loaded_historical_series.index,
134
+ y=st.session_state.loaded_historical_series.values,
135
+ mode='lines',
136
+ name='Historical Close'
137
+ ))
138
+ fig_hist.update_layout(title="Historical Stock Prices", xaxis_title="Date", yaxis_title="Price")
139
+ st.plotly_chart(fig_hist, use_container_width=True)
140
+ else:
141
+ st.info("Generate or load data to view historical prices.")
142
+
143
+
144
+ # --- Section 3: ARIMA Model Prediction ---
145
+ st.header("3. ARIMA Model Prediction")
146
+
147
+ col1, col2, col3, col4 = st.columns(4)
148
+ with col1:
149
+ st.number_input("ARIMA Order (p)", min_value=0, max_value=10, key="arima_p", help="Autoregressive order.")
150
+ with col2:
151
+ st.number_input("ARIMA Order (d)", min_value=0, max_value=3, key="arima_d", help="Differencing order.")
152
+ with col3:
153
+ st.number_input("ARIMA Order (q)", min_value=0, max_value=10, key="arima_q", help="Moving average order.")
154
+ with col4:
155
+ st.number_input("Number of Future Predictions", min_value=1, max_value=180, key="n_predictions", help="Number of future days to predict.")
156
+
157
+ if st.button("Run ARIMA Prediction", key="run_arima_button"):
158
+ if st.session_state.loaded_historical_series is None:
159
+ st.error("No historical data loaded. Please generate or load data first (Sections 1 or 2).")
160
+ else:
161
+ with st.spinner("Running ARIMA prediction... This might take a moment."):
162
+ try:
163
+ p, d, q = st.session_state.arima_p, st.session_state.arima_d, st.session_state.arima_q
164
+ n_preds = st.session_state.n_predictions
165
+
166
+ predictions = train_arima_and_predict(
167
+ series=st.session_state.loaded_historical_series,
168
+ arima_order=(p, d, q),
169
+ n_predictions=n_preds
170
+ )
171
+
172
+ if predictions is not None:
173
+ st.session_state.predictions_series = predictions
174
+ st.success(f"ARIMA prediction complete for {n_preds} steps.")
175
+
176
+ # Store predictions
177
+ storage = get_tensor_storage_instance()
178
+ model_details_dict = {"type": "ARIMA", "order": (p,d,q), "parameters_estimated": True} # Example details
179
+
180
+ pred_record_id = store_predictions_to_tensorus(
181
+ storage,
182
+ predictions,
183
+ PREDICTION_DATASET_NAME,
184
+ original_series_name=st.session_state.loaded_historical_series.name,
185
+ model_details=model_details_dict
186
+ )
187
+ if pred_record_id:
188
+ st.info(f"Predictions stored in Tensorus dataset '{PREDICTION_DATASET_NAME}' with Record ID: {pred_record_id}")
189
+ else:
190
+ st.warning("Failed to store predictions in Tensorus.")
191
+ else:
192
+ st.error("Failed to generate ARIMA predictions. Check model parameters, data, or logs.")
193
+ except Exception as e:
194
+ st.error(f"Error during ARIMA prediction: {e}")
195
+
196
+ # --- Section 4: Prediction Results & Visualization ---
197
+ st.header("4. Prediction Results")
198
+ if st.session_state.predictions_series is not None and st.session_state.loaded_historical_series is not None:
199
+ st.subheader("Historical Data and Predictions")
200
+
201
+ fig_combined = go.Figure()
202
+ fig_combined.add_trace(go.Scatter(
203
+ x=st.session_state.loaded_historical_series.index,
204
+ y=st.session_state.loaded_historical_series.values,
205
+ mode='lines',
206
+ name='Historical Close',
207
+ line=dict(color='blue')
208
+ ))
209
+ fig_combined.add_trace(go.Scatter(
210
+ x=st.session_state.predictions_series.index,
211
+ y=st.session_state.predictions_series.values,
212
+ mode='lines',
213
+ name='Predicted Close',
214
+ line=dict(color='red', dash='dash')
215
+ ))
216
+ fig_combined.update_layout(
217
+ title=f"{st.session_state.loaded_historical_series.name}: Historical vs. Predicted",
218
+ xaxis_title="Date",
219
+ yaxis_title="Price"
220
+ )
221
+ st.plotly_chart(fig_combined, use_container_width=True)
222
+
223
+ st.subheader("Predicted Values (Next {} Days)".format(len(st.session_state.predictions_series)))
224
+ # Display predictions as a DataFrame, ensure index is named 'Date' if it's a DatetimeIndex
225
+ pred_df_display = st.session_state.predictions_series.to_frame()
226
+ if isinstance(pred_df_display.index, pd.DatetimeIndex):
227
+ pred_df_display.index.name = "Date"
228
+ st.dataframe(pred_df_display)
229
+
230
+ elif st.session_state.loaded_historical_series is not None:
231
+ st.info("Run ARIMA Prediction in Section 3 to see forecasted results.")
232
+ else:
233
+ st.info("Generate or load data and run predictions to see results here.")
234
+
235
+ if __name__ == "__main__":
236
+ show_financial_demo_page()
pages/api_playground_v2.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/api_playground_v2.py
2
+
3
+ import streamlit as st
4
+ import streamlit.components.v1 as components
5
+
6
+ # Import from the shared utils for pages
7
+ try:
8
+ from pages.pages_shared_utils import (
9
+ load_css as load_shared_css,
10
+ API_BASE_URL, # Use this instead of TENSORUS_API_URL
11
+ get_api_status
12
+ )
13
+ except ImportError:
14
+ st.error("Critical Error: Could not import `pages_shared_utils`. Page cannot function.")
15
+ def load_shared_css(): pass
16
+ API_BASE_URL = "http://127.0.0.1:7860" # Fallback
17
+ def get_api_status():
18
+ st.error("`get_api_status` unavailable.")
19
+ return False, {"error": "Setup issue"} # Simulate API down
20
+ st.stop()
21
+
22
+ st.set_page_config(page_title="API Playground (V2)", layout="wide")
23
+ load_shared_css() # Load common CSS from shared utilities
24
+
25
+ # Custom CSS for API Playground page
26
+ # These styles are specific to this page and enhance the shared theme.
27
+ st.markdown("""
28
+ <style>
29
+ /* API Playground specific styles */
30
+ /* Using classes for titles and captions allows for more specific styling if needed,
31
+ while still inheriting base styles from shared CSS (h1, .stCaption etc.) */
32
+ .main-title {
33
+ color: #e0e0ff !important; /* Ensure it uses the desired dashboard title color */
34
+ text-align: center;
35
+ margin-bottom: 0.5rem;
36
+ font-weight: bold;
37
+ }
38
+ .main-caption {
39
+ color: #c0c0ff !important; /* Consistent caption color */
40
+ text-align: center;
41
+ margin-bottom: 2rem;
42
+ }
43
+
44
+ /* Status message styling - these could be potentially moved to shared_utils if used elsewhere */
45
+ /* For now, keeping them page-specific as they are styled slightly differently from .status-indicator */
46
+ .status-message-shared { /* Base for status messages on this page */
47
+ padding: 0.75rem;
48
+ border-radius: 5px;
49
+ margin-bottom: 1rem;
50
+ text-align: center; /* Center align text in status messages */
51
+ }
52
+ .status-success { /* Success status message, inherits from .status-message-shared */
53
+ color: #4CAF50; /* Green text */
54
+ background-color: rgba(76, 175, 80, 0.1); /* Light green background */
55
+ border: 1px solid rgba(76, 175, 80, 0.3); /* Light green border */
56
+ }
57
+ .status-error { /* Error status message, inherits from .status-message-shared */
58
+ color: #F44336; /* Red text */
59
+ background-color: rgba(244, 67, 54, 0.1); /* Light red background */
60
+ border: 1px solid rgba(244, 67, 54, 0.3); /* Light red border */
61
+ }
62
+ .status-error code, .status-success code { /* Style <code> tags within status messages */
63
+ background-color: rgba(0,0,0,0.1);
64
+ padding: 0.1em 0.3em;
65
+ border-radius: 3px;
66
+ }
67
+
68
+ /* Styling for the Streamlit Tabs component */
69
+ .tab-container .stTabs [data-baseweb="tab-list"] {
70
+ background-color: transparent; /* Remove default Streamlit tab bar background if any */
71
+ gap: 5px; /* Increased gap between tab headers */
72
+ border-bottom: 1px solid #2a3f5c; /* Match other dividers */
73
+ }
74
+ .tab-container .stTabs [data-baseweb="tab"] {
75
+ background-color: #18223f; /* Match common-card background for inactive tabs */
76
+ color: #c0c0ff; /* Standard text color for inactive tabs */
77
+ border-radius: 5px 5px 0 0; /* Rounded top corners */
78
+ padding: 0.75rem 1.5rem;
79
+ border: 1px solid #2a3f5c; /* Border for inactive tabs */
80
+ border-bottom: none; /* Remove bottom border as it's handled by tab-list */
81
+ margin-bottom: -1px; /* Overlap with tab-list border */
82
+ }
83
+ .tab-container .stTabs [data-baseweb="tab"][aria-selected="true"] {
84
+ background-color: #2a2f4c; /* Slightly lighter for active tab, similar to nav hover */
85
+ color: #ffffff; /* White text for active tab */
86
+ font-weight: bold;
87
+ border-color: #3a6fbf; /* Accent color for active tab border */
88
+ }
89
+
90
+ /* Container for the iframes displaying Swagger/ReDoc */
91
+ .iframe-container {
92
+ border: 1px solid #2a3f5c; /* Consistent border */
93
+ border-radius: 0 5px 5px 5px; /* Rounded corners, except top-left to align with tabs */
94
+ overflow: hidden; /* Ensures iframe respects border radius */
95
+ margin-top: -1px; /* Align with tab-list border */
96
+ height: 800px; /* Default height */
97
+ }
98
+ </style>
99
+ """, unsafe_allow_html=True)
100
+
101
+ # Page Title and Caption, using custom classes for styling
102
+ st.markdown('<h1 class="main-title">🚀 API Playground & Documentation Hub</h1>', unsafe_allow_html=True)
103
+ st.markdown('<p class="main-caption">Explore and interact with the Tensorus REST API directly.</p>', unsafe_allow_html=True)
104
+
105
+
106
+ # Check if API is running using the shared utility function.
107
+ api_ok, api_info = get_api_status()
108
+
109
+ # Display API status message.
110
+ if not api_ok:
111
+ # If API is not reachable, display an error message and stop page execution.
112
+ st.markdown(
113
+ f"""
114
+ <div class="status-message-shared status-error">
115
+ <strong>API Connection Error:</strong> The Tensorus API backend does not seem to be running or reachable at <code>{API_BASE_URL}</code>.
116
+ <br>Please ensure the backend (<code>uvicorn api:app --reload</code>) is active to use the API Playground.
117
+ </div>
118
+ """, unsafe_allow_html=True
119
+ )
120
+ st.stop() # Halt further rendering of the page.
121
+ else:
122
+ # If API is reachable, display a success message with API version.
123
+ api_version = api_info.get("version", "N/A") # Get API version from status info.
124
+ st.markdown(
125
+ f"""
126
+ <div class="status-message-shared status-success">
127
+ Successfully connected to Tensorus API v{api_version} at <code>{API_BASE_URL}</code>.
128
+ </div>
129
+ """, unsafe_allow_html=True
130
+ )
131
+
132
+ # Introductory text for the API Playground.
133
+ st.markdown(
134
+ """
135
+ This section provides live, interactive documentation for the Tensorus API,
136
+ powered by FastAPI's OpenAPI integration. You can explore endpoints,
137
+ view schemas, and even try out API calls directly in your browser.
138
+ """
139
+ )
140
+
141
+ # Use tabs to embed Swagger UI and ReDoc for API documentation.
142
+ # Wrapping tabs in a div to apply specific tab styling if needed.
143
+ st.markdown('<div class="tab-container">', unsafe_allow_html=True)
144
+ tab1, tab2 = st.tabs(["Swagger UI", "ReDoc"])
145
+ st.markdown('</div>', unsafe_allow_html=True)
146
+
147
+
148
+ # Construct the URLs for Swagger and ReDoc based on the API_BASE_URL from shared utils.
149
+ swagger_url = f"{API_BASE_URL}/docs"
150
+ redoc_url = f"{API_BASE_URL}/redoc"
151
+
152
+ # Swagger UI Tab
153
+ with tab1:
154
+ st.subheader("Swagger UI") # Styled by shared CSS
155
+ st.markdown(f"Explore the API interactively. [Open Swagger UI in new tab]({swagger_url})")
156
+ # Embed Swagger UI using an iframe within a styled container.
157
+ st.markdown('<div class="iframe-container">', unsafe_allow_html=True)
158
+ components.iframe(swagger_url, height=800, scrolling=True)
159
+ st.markdown('</div>', unsafe_allow_html=True)
160
+
161
+ # ReDoc Tab
162
+ with tab2:
163
+ st.subheader("ReDoc") # Styled by shared CSS
164
+ st.markdown(f"View the API documentation. [Open ReDoc in new tab]({redoc_url})")
165
+ # Embed ReDoc using an iframe within a styled container.
166
+ st.markdown('<div class="iframe-container">', unsafe_allow_html=True)
167
+ components.iframe(redoc_url, height=800, scrolling=True)
168
+ st.markdown('</div>', unsafe_allow_html=True)
169
+
170
+ st.divider() # Visual separator.
171
+ st.caption("Note: The API backend must be running and accessible to fully utilize the interactive documentation.")
pages/control_panel_v2.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/control_panel_v2.py
2
+
3
+ import streamlit as st
4
+ import time
5
+
6
+ # Import from the newly created shared utils for pages
7
+ try:
8
+ from pages.pages_shared_utils import (
9
+ get_agent_status,
10
+ start_agent,
11
+ stop_agent,
12
+ load_css as load_shared_css
13
+ )
14
+ except ImportError:
15
+ st.error("Critical Error: Could not import `pages_shared_utils`. Page cannot function.")
16
+ def get_agent_status(): st.error("`get_agent_status` unavailable."); return None
17
+ def start_agent(agent_id): st.error(f"Start action for {agent_id} unavailable."); return False
18
+ def stop_agent(agent_id): st.error(f"Stop action for {agent_id} unavailable."); return False
19
+ def load_shared_css(): pass
20
+ st.stop()
21
+
22
+
23
+ st.set_page_config(page_title="Agent Control Tower (V2)", layout="wide")
24
+ load_shared_css() # Load shared styles first. This is crucial for base styling.
25
+
26
+ # Custom CSS for Agent Cards on this page.
27
+ # These styles complement or override the .common-card style from shared_utils.
28
+ # Note: .common-card styles (background, border, padding, shadow) are inherited.
29
+ st.markdown("""
30
+ <style>
31
+ /* Agent Card specific styling */
32
+ .agent-card { /* This class is applied along with .common-card */
33
+ /* Example: if agent cards needed a slightly different background or padding than common-card */
34
+ /* background-color: #1c2849; */ /* Slightly different background if needed */
35
+ }
36
+ .agent-card-header { /* Header section within an agent card */
37
+ display: flex;
38
+ align-items: center;
39
+ margin-bottom: 1rem;
40
+ padding-bottom: 0.75rem;
41
+ border-bottom: 1px solid #2a3f5c; /* Consistent border color */
42
+ }
43
+ .agent-card-header .icon { /* Icon in the header */
44
+ font-size: 2em;
45
+ margin-right: 0.75rem;
46
+ color: #9090ff; /* Specific icon color for agent cards */
47
+ }
48
+ .agent-card-header h3 { /* Agent name in the header */
49
+ /* Inherits .common-card h3 styles primarily, ensuring consistency */
50
+ color: #d0d0ff !important; /* Override if a brighter color is needed than common-card h3 */
51
+ font-size: 1.4em !important;
52
+ margin: 0 !important; /* Remove any default margins */
53
+ }
54
+ .agent-card .status-text {
55
+ font-weight: bold !important; /* Ensure status text is bold */
56
+ display: inline-block; /* Allows padding and consistent look */
57
+ padding: 0.2em 0.5em;
58
+ border-radius: 4px;
59
+ font-size: 0.9em;
60
+ }
61
+ /* Specific status colors for agent cards, using shared .status-indicator naming convention for consistency */
62
+ .agent-card .status-running { color: #ffffff; background-color: #4CAF50;}
63
+ .agent-card .status-stopped { color: #e0e0e0; background-color: #525252;}
64
+ .agent-card .status-error { color: #ffffff; background-color: #F44336;}
65
+ .agent-card .status-unknown { color: #333333; background-color: #BDBDBD;}
66
+
67
+ .agent-card p.description { /* Agent description text */
68
+ font-size: 0.95em;
69
+ color: #c0c0e0;
70
+ margin-bottom: 0.75rem;
71
+ }
72
+ .agent-card .metrics { /* Styling for the metrics block within an agent card */
73
+ font-size: 0.9em;
74
+ color: #b0b0d0;
75
+ margin-top: 0.75rem;
76
+ padding: 0.6rem; /* Increased padding */
77
+ background-color: rgba(0,0,0,0.15); /* Slightly darker background for metrics section */
78
+ border-radius: 4px;
79
+ border-left: 3px solid #3a6fbf; /* Accent blue border */
80
+ }
81
+ .agent-card .metrics p { margin-bottom: 0.3rem; } /* Spacing for lines within metrics block */
82
+
83
+ .agent-card .actions { /* Container for action buttons */
84
+ margin-top: 1.25rem;
85
+ display: flex;
86
+ gap: 0.75rem; /* Space between buttons */
87
+ justify-content: flex-start; /* Align buttons to the start */
88
+ }
89
+ /* Buttons within .actions will inherit general .stButton styling from shared_css */
90
+ </style>
91
+ """, unsafe_allow_html=True)
92
+
93
+ st.title("🕹️ Agent Control Tower")
94
+ st.caption("Manage and monitor your Tensorus intelligent agents.")
95
+
96
+ # AGENT_DETAILS_MAP provides static information (name, icon, description) for each agent type.
97
+ # This map is used to render the UI elements for agents known to the frontend.
98
+ # Metrics template helps in displaying placeholder metrics consistently.
99
+ AGENT_DETAILS_MAP = {
100
+ "ingestion": {"name": "Ingestion Agent", "icon": "📥", "description": "Monitors file systems and ingests new data tensors into the platform.", "metrics_template": "Files Processed: {files_processed}"},
101
+ "rl_trainer": {"name": "RL Training Agent", "icon": "🧠", "description": "Trains reinforcement learning models using available experiences and data.", "metrics_template": "Training Cycles: {episodes_trained}, Avg. Reward: {avg_reward}"},
102
+ "automl_search": {"name": "AutoML Search Agent", "icon": "✨", "description": "Performs automated machine learning model searches and hyperparameter tuning.", "metrics_template": "Search Trials: {trials_completed}, Best Model Score: {best_score}"},
103
+ "nql_query": {"name": "NQL Query Agent", "icon": "🗣️", "description": "Processes Natural Query Language (NQL) requests against tensor data.", "metrics_template": "Queries Handled: {queries_processed}"}
104
+ }
105
+
106
+ # Create two tabs: one for the roster of agents, another for visualizing interactions.
107
+ tab1, tab2 = st.tabs(["📊 Agent Roster", "🔗 Agent Interaction Visualizer"])
108
+
109
+ # --- Agent Roster Tab ---
110
+ with tab1:
111
+ st.header("Live Agent Roster") # Standard header, styled by shared CSS
112
+
113
+ # Button to manually refresh agent statuses from the backend.
114
+ if st.button("🔄 Refresh Agent Statuses"):
115
+ # Update session state cache with fresh data from API.
116
+ # Using a page-specific cache key to avoid conflicts.
117
+ st.session_state.agent_statuses_cache_control_panel_v2 = get_agent_status()
118
+ # Streamlit automatically reruns the script on button press.
119
+
120
+ # Cache for agent statuses to avoid redundant API calls on every interaction.
121
+ # Initializes if key is not present or if cache is None (e.g., after an error).
122
+ cache_key = 'agent_statuses_cache_control_panel_v2'
123
+ if cache_key not in st.session_state or st.session_state[cache_key] is None:
124
+ st.session_state[cache_key] = get_agent_status() # Initial fetch if not in cache
125
+
126
+ agents_api_data = st.session_state[cache_key] # Retrieve cached data.
127
+
128
+ # Check if agent data was successfully fetched.
129
+ if not agents_api_data:
130
+ st.error("Could not fetch agent statuses. Ensure the backend API is running and `pages_shared_utils.py` is correctly configured.")
131
+ else:
132
+ num_columns = 2 # Define number of columns for agent card layout.
133
+ cols = st.columns(num_columns)
134
+
135
+ # Iterate through a predefined list of agent IDs expected to be available.
136
+ # This ensures consistent ordering and display of known agents.
137
+ agent_ids_to_display = list(AGENT_DETAILS_MAP.keys())
138
+
139
+ for i, agent_id in enumerate(agent_ids_to_display):
140
+ # Retrieve static details (icon, name, description) from the predefined map.
141
+ # Provides a fallback if an agent_id from the API isn't in the map (e.g., new agent not yet in UI map).
142
+ agent_static_details = AGENT_DETAILS_MAP.get(agent_id, {
143
+ "name": agent_id.replace("_", " ").title() + " Agent (Unknown)", # Default name formatting
144
+ "icon": "❓", # Default icon for unknown agent types
145
+ "description": "Details not defined in AGENT_DETAILS_MAP.",
146
+ "metrics_template": "Status: {status}" # Basic fallback metric
147
+ })
148
+ # Retrieve live runtime information (status, specific metrics) from the API data.
149
+ agent_runtime_info = agents_api_data.get(agent_id, {}) # Default to empty dict if agent not in API response
150
+
151
+ with cols[i % num_columns]: # Distribute agents into the defined columns.
152
+ # Each agent is displayed in a styled card.
153
+ # The card uses 'common-card' class from shared CSS and page-specific 'agent-card'.
154
+ st.markdown(f'<div class="common-card agent-card" id="agent-card-{agent_id}">', unsafe_allow_html=True)
155
+
156
+ # Agent card header: Icon and Name.
157
+ st.markdown(f"""
158
+ <div class="agent-card-header">
159
+ <span class="icon">{agent_static_details['icon']}</span>
160
+ <h3>{agent_static_details['name']}</h3>
161
+ </div>
162
+ """, unsafe_allow_html=True)
163
+
164
+ # Agent status: Fetched live, styled based on status string.
165
+ # Prioritize 'status' field, fallback to 'running' boolean if 'status' is missing.
166
+ current_status = agent_runtime_info.get('status', agent_runtime_info.get('running', 'unknown'))
167
+ if isinstance(current_status, bool): # Convert boolean status to string representation
168
+ current_status = "running" if current_status else "stopped"
169
+ status_class = f"status-{current_status.lower()}" # CSS class for styling the status text
170
+ st.markdown(f'<p>Status: <span class="status-text {status_class}">{current_status.upper()}</span></p>', unsafe_allow_html=True)
171
+
172
+ # Agent description from the static map.
173
+ st.markdown(f"<p class='description'>{agent_static_details['description']}</p>", unsafe_allow_html=True)
174
+
175
+ # Placeholder for agent-specific metrics.
176
+ # Actual metrics would be populated from `agent_runtime_info` if the backend provides them.
177
+ # Fallback to "N/A" if a metric key is not present in runtime_info.
178
+ metrics_values = {
179
+ "files_processed": agent_runtime_info.get("files_processed", "N/A"),
180
+ "episodes_trained": agent_runtime_info.get("episodes_trained", "N/A"),
181
+ "avg_reward": agent_runtime_info.get("avg_reward", "N/A"),
182
+ "trials_completed": agent_runtime_info.get("trials_completed", "N/A"),
183
+ "best_score": agent_runtime_info.get("best_score", "N/A"),
184
+ "queries_processed": agent_runtime_info.get("queries_processed", "N/A"),
185
+ "status": current_status # Fallback for generic status display in metrics
186
+ }
187
+ metrics_str = agent_static_details['metrics_template'].format(**metrics_values)
188
+ st.markdown(f'<div class="metrics"><p>{metrics_str}</p></div>', unsafe_allow_html=True)
189
+
190
+ # Action buttons container.
191
+ st.markdown('<div class="actions">', unsafe_allow_html=True)
192
+ actions_cols = st.columns([1,1,1]) # Columns for button layout within the actions div.
193
+
194
+ with actions_cols[0]: # Details button (currently a placeholder).
195
+ st.button("Details", key=f"details_{agent_id}_v2", help="View detailed agent information (coming soon).", disabled=True, use_container_width=True)
196
+
197
+ with actions_cols[1]: # Start/Stop button, conditional on agent status.
198
+ if current_status == "running":
199
+ if st.button("Stop", key=f"stop_{agent_id}_v2", type="secondary", use_container_width=True):
200
+ result = stop_agent(agent_id) # API call from shared_utils
201
+ st.toast(result.get("message", f"Stop request sent for {agent_id}."))
202
+ time.sleep(1.0) # Brief pause to allow backend to process the request.
203
+ st.session_state[cache_key] = get_agent_status() # Refresh status cache.
204
+ st.rerun() # Rerun page to reflect updated status.
205
+ else: # Agent is stopped, errored, or in an unknown state.
206
+ if st.button("Start", key=f"start_{agent_id}_v2", type="primary", use_container_width=True):
207
+ result = start_agent(agent_id) # API call from shared_utils
208
+ st.toast(result.get("message", f"Start request sent for {agent_id}."))
209
+ time.sleep(1.0)
210
+ st.session_state[cache_key] = get_agent_status() # Refresh.
211
+ st.rerun()
212
+
213
+ with actions_cols[2]: # Configure button (currently a placeholder).
214
+ st.button("Configure", key=f"config_{agent_id}_v2", help="Configure agent settings (coming soon).", disabled=True, use_container_width=True)
215
+
216
+ st.markdown('</div>', unsafe_allow_html=True) # Close actions div.
217
+ st.markdown('</div>', unsafe_allow_html=True) # Close agent-card div.
218
+
219
+ # --- Agent Interaction Visualizer Tab ---
220
+ with tab2: # Agent Interaction Visualizer Tab
221
+ st.header("Agent Interaction Visualizer") # Standard header
222
+ st.subheader("Conceptual Agent-Data Flow")
223
+
224
+ # DOT language definition for the Graphviz chart.
225
+ # This describes the nodes (agents, data stores, sources) and edges (data flow) of the system.
226
+ graphviz_code = """
227
+ digraph AgentDataFlow {
228
+ // General graph attributes
229
+ rankdir=LR; /* Layout direction: Left to Right */
230
+ bgcolor="#0a0f2c"; /* Background color to match the app theme */
231
+ node [shape=record, style="filled,rounded", fillcolor="#18223f", /* Default node style */
232
+ fontname="Arial", fontsize=11, fontcolor="#e0e0e0", /* Default font attributes */
233
+ color="#3a6fbf", penwidth=1.5]; /* Default border color and width */
234
+ edge [fontname="Arial", fontsize=10, fontcolor="#c0c0ff", /* Default edge style */
235
+ color="#7080ff", penwidth=1.2]; /* Default edge color and width */
236
+
237
+ // Cluster for Data Sources
238
+ subgraph cluster_data_sources {
239
+ label="External Sources"; /* Cluster title */
240
+ style="rounded";
241
+ color="#4A5C85"; /* Cluster border color */
242
+ bgcolor="#101828"; /* Cluster background (slightly different from main) */
243
+ fontcolor="#D0D0FF"; /* Cluster title color */
244
+
245
+ fs [label="FileSystem | (e.g., S3, Local Disk)", shape=folder, fillcolor="#3E5F8A"]; // File system node
246
+ user_queries [label="User Queries | (via UI/API)", shape=ellipse, fillcolor="#5DADE2"]; // User queries node
247
+ }
248
+
249
+ // Cluster for Intelligent Agents
250
+ subgraph cluster_agents_group {
251
+ label="Intelligent Agents";
252
+ style="rounded"; color="#4A5C85"; bgcolor="#101828"; fontcolor="#D0D0FF";
253
+
254
+ ingestion_agent [label="{IngestionAgent | 📥 | Ingests raw data}", shape=Mrecord, fillcolor="#E74C3C"]; // Red
255
+ nql_agent [label="{NQLAgent | 🗣️ | Processes natural language queries}", shape=Mrecord, fillcolor="#9B59B6"]; // Purple
256
+ rl_agent [label="{RLAgent | 🧠 | Trains RL models, generates experiences}", shape=Mrecord, fillcolor="#2ECC71"]; // Green
257
+ automl_agent [label="{AutoMLAgent | ✨ | Conducts AutoML searches}", shape=Mrecord, fillcolor="#F1C40F"]; // Yellow
258
+ }
259
+
260
+ // Cluster for Data Stores
261
+ subgraph cluster_data_stores {
262
+ label="Tensorus Data Stores";
263
+ style="rounded"; color="#4A5C85"; bgcolor="#101828"; fontcolor="#D0D0FF";
264
+
265
+ ingested_data_api [label="Ingested Data Store | (Primary Tensor Collection)", shape=cylinder, fillcolor="#7F8C8D", height=1.5]; // Grey
266
+ rl_states [label="RL States | (Tensor Collection)", shape=cylinder, fillcolor="#95A5A6"]; // Lighter Grey
267
+ rl_experiences [label="RL Experiences | (Metadata/Tensor Collection)", shape=cylinder, fillcolor="#95A5A6"];
268
+ automl_results [label="AutoML Results | (Tensor/Metadata Collection)", shape=cylinder, fillcolor="#95A5A6"];
269
+ }
270
+
271
+ // Edges defining data flow between nodes
272
+ fs -> ingestion_agent [label=" new files/streams"];
273
+ ingestion_agent -> ingested_data_api [label=" stores tensors & metadata"];
274
+
275
+ user_queries -> nql_agent [label=" NQL query"];
276
+ nql_agent -> ingested_data_api [label=" reads data"];
277
+ nql_agent -> rl_experiences [label=" (can also query specific stores like experiences)", style=dashed]; /* Dashed for optional/secondary path */
278
+
279
+ ingested_data_api -> rl_agent [label=" reads training data/states"];
280
+ rl_agent -> rl_states [label=" stores/updates policy states"];
281
+ rl_agent -> rl_experiences [label=" stores new experiences"];
282
+ rl_experiences -> rl_agent [label=" reads past experiences for training"]; // Loop back for learning
283
+
284
+ ingested_data_api -> automl_agent [label=" reads datasets for AutoML"];
285
+ automl_agent -> automl_results [label=" stores trial results & models"];
286
+ }
287
+ """
288
+ try:
289
+ # Render the Graphviz chart.
290
+ st.graphviz_chart(graphviz_code)
291
+ st.caption("This is a conceptual representation of data flows. Actual interactions can be more complex and configurable.")
292
+ except Exception as e:
293
+ # Handle potential errors if Graphviz is not installed or there's an issue with the DOT code.
294
+ st.error(f"Could not render Graphviz chart: {e}")
295
+ st.markdown("Please ensure Graphviz is installed and accessible in your environment's PATH (e.g., `sudo apt-get install graphviz` on Debian/Ubuntu).")
296
+
297
+ # Initialize session state key if not already present for this page.
298
+ # This helps prevent errors if the page is loaded before the cache is populated.
299
+ if 'agent_statuses_cache_control_panel_v2' not in st.session_state:
300
+ st.session_state.agent_statuses_cache_control_panel_v2 = None
pages/data_explorer_v2.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/data_explorer_v2.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import torch
7
+
8
+ try:
9
+ from pages.pages_shared_utils import (
10
+ load_css as load_shared_css,
11
+ get_datasets,
12
+ get_dataset_preview
13
+ )
14
+ except ImportError:
15
+ st.error("Critical Error: Could not import `pages_shared_utils`. Page cannot function.")
16
+ def load_shared_css(): pass
17
+ def get_datasets(): st.error("`get_datasets` unavailable."); return []
18
+ def get_dataset_preview(dataset_name, limit=5): st.error("`get_dataset_preview` unavailable."); return None
19
+ st.stop()
20
+
21
+ # Configure page settings
22
+ st.set_page_config(page_title="Tensor Explorer V2", layout="wide")
23
+ load_shared_css() # Apply shared Nexus theme styles
24
+
25
+ st.title("🔍 Tensor Explorer (V2)")
26
+ st.caption("Browse, filter, and visualize Tensorus datasets with agent context.")
27
+
28
+ # --- Dataset Selection ---
29
+ # Fetch the list of available datasets from the backend using shared utility.
30
+ available_datasets = get_datasets()
31
+ if not available_datasets:
32
+ st.warning("No datasets found. Ensure the backend API is running and accessible.")
33
+ st.stop() # Halt execution if no datasets are available for exploration.
34
+
35
+ # Dropdown for user to select a dataset.
36
+ selected_dataset_name = st.selectbox("Select Dataset:", available_datasets)
37
+
38
+ # --- Data Fetching & Initial Processing ---
39
+ # Proceed only if a dataset is selected.
40
+ if selected_dataset_name:
41
+ st.subheader(f"Exploring: {selected_dataset_name}") # Styled by shared CSS
42
+
43
+ MAX_RECORDS_DISPLAY = 100 # Define max records to fetch for the initial preview.
44
+ # Fetch dataset preview information (includes sample records) using shared utility.
45
+ dataset_info = get_dataset_preview(selected_dataset_name, limit=MAX_RECORDS_DISPLAY)
46
+
47
+ # Validate fetched data.
48
+ if dataset_info is None or "preview" not in dataset_info:
49
+ st.error(f"Failed to fetch data preview for '{selected_dataset_name}'. The backend might be down or the dataset is invalid.")
50
+ st.stop()
51
+
52
+ records_preview = dataset_info.get("preview", []) # List of records (tensor data + metadata).
53
+ total_records_in_dataset = dataset_info.get("record_count", len(records_preview)) # Total records in the dataset.
54
+
55
+ if not records_preview:
56
+ st.info(f"Dataset '{selected_dataset_name}' is empty or no preview data available.")
57
+ st.stop()
58
+
59
+ st.info(f"Displaying {len(records_preview)} of {total_records_in_dataset} records from '{selected_dataset_name}'.")
60
+
61
+ # Prepare data for DataFrame display: extract and flatten metadata.
62
+ # This makes metadata fields directly accessible as columns in the DataFrame for filtering and display.
63
+ processed_records_for_df = []
64
+ for record in records_preview:
65
+ meta = record.get('metadata', {}).copy() # Make a copy to avoid modifying original record.
66
+ meta['tensor_id'] = record.get('id', 'N/A') # Use 'id' from record as 'tensor_id'.
67
+ meta['shape'] = str(record.get('shape', 'N/A')) # Store shape as string for display.
68
+ meta['dtype'] = record.get('dtype', 'N/A')
69
+ # Ensure 'created_by' field exists, defaulting to 'Unknown' if not present. This is for filtering.
70
+ meta['created_by'] = meta.get('created_by', 'Unknown')
71
+ processed_records_for_df.append(meta)
72
+
73
+ df_display_initial = pd.DataFrame(processed_records_for_df) # DataFrame for filtering and display.
74
+
75
+ # --- Sidebar Filtering UI ---
76
+ st.sidebar.header("Filter Options") # Styled by shared CSS
77
+
78
+ # Filter by Source Agent ('created_by' field).
79
+ st.sidebar.subheader("Filter by Source Agent") # Styled by shared CSS
80
+ # Predefined agent sources, augmented with any unique sources found in the data for comprehensive filtering.
81
+ agent_sources_default = ["IngestionAgent", "RLAgent", "AutoMLAgent", "Unknown"]
82
+ if 'created_by' in df_display_initial.columns:
83
+ available_agents_in_data = df_display_initial['created_by'].unique().tolist()
84
+ filter_options_agents = sorted(list(set(agent_sources_default + available_agents_in_data)))
85
+ else:
86
+ filter_options_agents = agent_sources_default # Fallback if 'created_by' column is missing.
87
+
88
+ selected_agent_filters = st.sidebar.multiselect(
89
+ "Show data created by:",
90
+ options=filter_options_agents,
91
+ default=[] # No agents selected by default; shows all data initially.
92
+ )
93
+
94
+ # Filter by Other Metadata Fields.
95
+ st.sidebar.subheader("Filter by Other Metadata Fields") # Styled by shared CSS
96
+ # Allow filtering on any metadata column except those already specifically handled or less useful for direct filtering.
97
+ potential_filter_cols = [col for col in df_display_initial.columns if col not in ['tensor_id', 'created_by', 'shape', 'dtype']]
98
+
99
+ filter_cols_metadata = st.sidebar.multiselect(
100
+ "Select metadata fields to filter:",
101
+ options=potential_filter_cols
102
+ )
103
+
104
+ # Apply filters to the DataFrame. Start with a copy of the initial DataFrame.
105
+ filtered_df_final = df_display_initial.copy()
106
+
107
+ # Apply agent source filter if any agents are selected.
108
+ if selected_agent_filters:
109
+ if 'created_by' in filtered_df_final.columns:
110
+ filtered_df_final = filtered_df_final[filtered_df_final['created_by'].isin(selected_agent_filters)]
111
+ else:
112
+ # This case should ideally not be reached if 'created_by' is always added or defaulted.
113
+ st.sidebar.warning("'created_by' field not found for agent filtering.")
114
+ filtered_df_final = filtered_df_final.iloc[0:0] # Show no results if filter is active but field is missing.
115
+
116
+ # Apply other metadata filters based on user selections.
117
+ for col in filter_cols_metadata:
118
+ unique_values = filtered_df_final[col].dropna().unique().tolist()
119
+ # Numeric filter: use a range slider if there are multiple unique numeric values.
120
+ if pd.api.types.is_numeric_dtype(filtered_df_final[col]) and filtered_df_final[col].nunique() > 1:
121
+ min_val, max_val = float(filtered_df_final[col].min()), float(filtered_df_final[col].max())
122
+ selected_range = st.sidebar.slider(f"Filter {col}:", min_val, max_val, (min_val, max_val), key=f"slider_{col}")
123
+ filtered_df_final = filtered_df_final[filtered_df_final[col].between(selected_range[0], selected_range[1])]
124
+ # Categorical filter (with limited unique values for dropdown): use multiselect.
125
+ elif len(unique_values) > 0 and len(unique_values) <= 25: # Threshold for using multiselect.
126
+ default_selection = [] # Default to no specific selection (i.e., don't filter unless user picks).
127
+ selected_values = st.sidebar.multiselect(f"Filter {col}:", options=unique_values, default=default_selection, key=f"multi_{col}")
128
+ if selected_values: # Only apply filter if user has made selections.
129
+ filtered_df_final = filtered_df_final[filtered_df_final[col].isin(selected_values)]
130
+ # Text search for other columns or columns with too many unique values.
131
+ else:
132
+ search_term = st.sidebar.text_input(f"Search in {col} (contains):", key=f"text_{col}").lower()
133
+ if search_term:
134
+ filtered_df_final = filtered_df_final[filtered_df_final[col].astype(str).str.lower().str.contains(search_term, na=False)]
135
+
136
+ # --- Display Filtered Data ---
137
+ st.divider() # Visual separator.
138
+ st.subheader("Filtered Data View") # Styled by shared CSS
139
+ st.write(f"{len(filtered_df_final)} records matching filters.") # Display count of filtered records.
140
+
141
+ # Define preferred column order for the displayed DataFrame.
142
+ cols_to_display_order = ['tensor_id', 'created_by', 'shape', 'dtype']
143
+ remaining_cols = [col for col in df_display_initial.columns if col not in cols_to_display_order]
144
+ # Ensure only existing columns are included in the final display order.
145
+ final_display_columns = [col for col in cols_to_display_order if col in filtered_df_final.columns] + \
146
+ [col for col in remaining_cols if col in filtered_df_final.columns]
147
+
148
+ st.dataframe(filtered_df_final[final_display_columns], use_container_width=True, hide_index=True)
149
+
150
+ # --- Tensor Preview & Visualization ---
151
+ st.divider()
152
+ st.subheader("Tensor Preview") # Styled by shared CSS
153
+
154
+ if not filtered_df_final.empty:
155
+ available_tensor_ids = filtered_df_final['tensor_id'].tolist()
156
+ # Dropdown to select a tensor ID from the filtered results for preview.
157
+ selected_tensor_id = st.selectbox("Select Tensor ID to Preview:", available_tensor_ids, key="tensor_preview_select")
158
+
159
+ if selected_tensor_id:
160
+ # Retrieve the full record (including raw tensor data list) from the original preview list.
161
+ selected_full_record = next((r for r in records_preview if r.get('id') == selected_tensor_id), None)
162
+
163
+ if selected_full_record:
164
+ st.write("**Full Metadata:**")
165
+ st.json(selected_full_record.get('metadata', {})) # Display all metadata for the selected tensor.
166
+
167
+ shape = selected_full_record.get("shape")
168
+ dtype_str = selected_full_record.get("dtype")
169
+ data_list = selected_full_record.get("data") # Raw list representation of tensor data.
170
+
171
+ st.write(f"**Tensor Info:** Shape=`{shape}`, Dtype=`{dtype_str}`")
172
+ source_agent = selected_full_record.get('metadata', {}).get('created_by', 'Unknown')
173
+ st.write(f"**Source Agent:** `{source_agent}`") # Display the 'created_by' agent.
174
+
175
+ try:
176
+ if shape and dtype_str and data_list is not None:
177
+ # Reconstruct the tensor from its list representation and metadata.
178
+ torch_dtype = getattr(torch, dtype_str, None) # Get torch.dtype from string.
179
+ if torch_dtype is None:
180
+ st.error(f"Unsupported dtype: {dtype_str}. Cannot reconstruct tensor.")
181
+ else:
182
+ tensor = torch.tensor(data_list, dtype=torch_dtype)
183
+ st.write("**Tensor Data (first 10 elements flattened):**")
184
+ st.code(f"{tensor.flatten()[:10].cpu().numpy()}...") # Display a snippet of tensor data.
185
+
186
+ # --- Simple Visualizations based on tensor dimensions ---
187
+ if tensor.ndim == 1 and tensor.numel() > 1: # 1D tensor: line chart.
188
+ st.line_chart(tensor.cpu().numpy())
189
+ elif tensor.ndim == 2 and tensor.shape[0] > 1 and tensor.shape[1] > 1 : # 2D tensor: heatmap.
190
+ try:
191
+ fig = px.imshow(tensor.cpu().numpy(), title="Tensor Heatmap", aspect="auto")
192
+ st.plotly_chart(fig, use_container_width=True)
193
+ except Exception as plot_err:
194
+ st.warning(f"Could not generate heatmap: {plot_err}")
195
+ elif tensor.ndim == 3 and tensor.shape[0] in [1, 3]: # 3D tensor (potential image): try to display as image.
196
+ try:
197
+ display_tensor = tensor.cpu()
198
+ if display_tensor.shape[0] == 1: # Grayscale image (C, H, W) -> (H, W)
199
+ display_tensor = display_tensor.squeeze(0)
200
+ elif display_tensor.shape[0] == 3: # RGB image (C, H, W) -> (H, W, C)
201
+ display_tensor = display_tensor.permute(1, 2, 0)
202
+
203
+ # Normalize for display if not in typical image range [0,1] or [0,255].
204
+ if display_tensor.max() > 1.0 or display_tensor.min() < 0.0: # Basic check
205
+ display_tensor = (display_tensor - display_tensor.min()) / (display_tensor.max() - display_tensor.min() + 1e-6) # Normalize to [0,1]
206
+
207
+ st.image(display_tensor.numpy(), caption="Tensor as Image (Attempted)", use_column_width=True)
208
+ except Exception as img_err:
209
+ st.warning(f"Could not display tensor as image: {img_err}")
210
+ else:
211
+ st.info("No specific visualization available for this tensor's shape/dimension.")
212
+ else:
213
+ st.warning("Tensor data, shape, or dtype missing in the selected record.")
214
+ except Exception as tensor_err:
215
+ st.error(f"Error processing tensor data for preview: {tensor_err}")
216
+ else:
217
+ st.warning("Selected tensor ID details not found in the fetched preview data.")
218
+ else:
219
+ st.info("Select a Tensor ID from the filtered table above to preview its details.")
220
+ else:
221
+ st.info("No records match the current filters to allow preview.")
222
+ else:
223
+ st.info("Select a dataset to start exploring.")
224
+
pages/nql_chatbot_v2.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/nql_chatbot_v2.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+
6
+ # Import from the shared utils for pages
7
+ try:
8
+ from pages.pages_shared_utils import (
9
+ load_css as load_shared_css,
10
+ post_nql_query
11
+ )
12
+ except ImportError:
13
+ st.error("Critical Error: Could not import `pages_shared_utils`. Page cannot function.")
14
+ def load_shared_css(): pass
15
+ def post_nql_query(query: str):
16
+ st.error("`post_nql_query` unavailable.")
17
+ return {"query": query, "response_text": "Error: NQL processing unavailable.", "error": "Setup issue.", "results": None}
18
+ st.stop()
19
+
20
+ st.set_page_config(page_title="NQL Chatbot (V2)", layout="wide")
21
+ load_shared_css() # Load common CSS from shared utilities
22
+
23
+ # Custom CSS for NQL Chatbot page
24
+ # These styles are specific to the chat interface elements.
25
+ st.markdown("""
26
+ <style>
27
+ /* Chatbot specific styles */
28
+ .stChatMessage { /* Base style for chat messages */
29
+ border-radius: 10px; /* Rounded corners */
30
+ padding: 0.85rem 1.15rem; /* Comfortable padding */
31
+ margin-bottom: 0.75rem; /* Space between messages */
32
+ box-shadow: 0 2px 5px rgba(0,0,0,0.15); /* Subtle shadow */
33
+ border: 1px solid #2a3f5c; /* Consistent border with other elements */
34
+ }
35
+ /* User messages styling */
36
+ .stChatMessage[data-testid="stChatMessageContent"]:has(.user-avatar) {
37
+ background-color: #2a2f4c; /* Darker blue, similar to nav hover, for user messages */
38
+ border-left: 5px solid #3a6fbf; /* Accent blue border, consistent with active nav link */
39
+ }
40
+ /* Assistant messages styling */
41
+ .stChatMessage[data-testid="stChatMessageContent"]:has(.assistant-avatar) {
42
+ background-color: #18223f; /* Slightly lighter blue-gray, similar to common-card */
43
+ border-left: 5px solid #7070ff; /* Muted purple accent for assistant messages */
44
+ }
45
+ .user-avatar, .assistant-avatar { /* Styling for user/assistant avatars */
46
+ font-size: 1.5rem; /* Avatar size */
47
+ margin-right: 0.5rem; /* Space between avatar and message content */
48
+ }
49
+
50
+ /* Styling for the chat input area at the bottom */
51
+ /* The .chat-input-container class is a convention, Streamlit might not add this by default.
52
+ Targeting based on Streamlit's internal structure is more robust. */
53
+ div[data-testid="stChatInput"] { /* Main container for chat input */
54
+ background-color: #0a0f2c; /* Match page background */
55
+ border-top: 1px solid #2a3f5c; /* Separator line */
56
+ padding: 0.75rem;
57
+ }
58
+ div[data-testid="stChatInput"] textarea { /* The actual text input field */
59
+ border: 1px solid #3a3f5c !important;
60
+ background-color: #1a1f3c !important; /* Dark input background */
61
+ color: #e0e0e0 !important; /* Light text color */
62
+ border-radius: 5px !important;
63
+ }
64
+ div[data-testid="stChatInput"] button { /* The send button */
65
+ border: none !important;
66
+ background-color: #3a6fbf !important; /* Accent blue, consistent with primary buttons */
67
+ color: white !important;
68
+ border-radius: 5px !important;
69
+ }
70
+ div[data-testid="stChatInput"] button:hover {
71
+ background-color: #4a7fdc !important; /* Lighter blue on hover */
72
+ }
73
+
74
+ /* Styling for DataFrames displayed within chat messages */
75
+ .stDataFrame { /* General DataFrame styling from shared_utils will apply */
76
+ margin-top: 0.5rem; /* Space above DataFrame in chat */
77
+ }
78
+ /* .results-dataframe and .results-dataframe .col-header are less reliable than generic .stDataFrame */
79
+ </style>
80
+ """, unsafe_allow_html=True)
81
+
82
+
83
+ st.title("💬 Natural Query Language (NQL) Chatbot")
84
+ st.caption("Query Tensorus datasets using natural language. Powered by Tensorus NQL Agent.")
85
+
86
+ # Initialize chat history in session state if it doesn't exist.
87
+ # Use a unique key for this page's messages to avoid conflicts with other potential chat interfaces.
88
+ if "nql_messages_v2" not in st.session_state:
89
+ st.session_state.nql_messages_v2 = []
90
+
91
+ # Display chat messages from history on app rerun.
92
+ # This ensures that the conversation persists during the session.
93
+ for message in st.session_state.nql_messages_v2:
94
+ avatar = "👤" if message["role"] == "user" else "🤖" # Assign avatar based on role
95
+ with st.chat_message(message["role"], avatar=avatar):
96
+ st.markdown(message["content"]) # Display the text content of the message.
97
+ # If the message includes a DataFrame (results from NQL query), display it.
98
+ if "results_df" in message and message["results_df"] is not None and not message["results_df"].empty:
99
+ st.dataframe(message["results_df"], use_container_width=True, hide_index=True)
100
+ # If the message includes an error, display it in an error box.
101
+ elif "error" in message and message["error"]:
102
+ st.error(message["error"])
103
+
104
+
105
+ # React to user input from the chat interface.
106
+ if prompt := st.chat_input("Enter your query (e.g., 'show 5 tensors from my_dataset')"):
107
+ # Add user's message to chat history and display it in the chat interface.
108
+ st.session_state.nql_messages_v2.append({"role": "user", "content": prompt})
109
+ with st.chat_message("user", avatar="👤"):
110
+ st.markdown(prompt)
111
+
112
+ # Get assistant's response by processing the query.
113
+ with st.spinner("Tensorus NQL Agent is thinking..."): # Show a loading spinner.
114
+ # Call the NQL processing function from shared utilities.
115
+ nql_api_response = post_nql_query(prompt)
116
+
117
+ # Extract relevant information from the API response.
118
+ response_text = nql_api_response.get("response_text", "Sorry, I encountered an issue processing your request.")
119
+ results_data = nql_api_response.get("results") # Expected to be a list of dicts (records).
120
+ error_message = nql_api_response.get("error") # Any error message from the backend.
121
+
122
+ # Prepare data for storing the assistant's message in session state.
123
+ assistant_message_data = {"role": "assistant", "content": response_text}
124
+
125
+ # Display assistant's response in the chat interface.
126
+ with st.chat_message("assistant", avatar="🤖"):
127
+ st.markdown(response_text) # Display the textual response.
128
+
129
+ if results_data: # If the API returned 'results'.
130
+ try:
131
+ # Process results into a Pandas DataFrame for structured display.
132
+ # Assumes results_data is a list of records (dictionaries).
133
+ # Each record might have 'id', 'shape', 'dtype', 'metadata'.
134
+ processed_for_df = []
135
+ for record in results_data:
136
+ row = {"tensor_id": record.get("id")} # Start with tensor_id.
137
+ # Flatten metadata fields into the main row for the DataFrame.
138
+ if isinstance(record.get("metadata"), dict):
139
+ row.update(record["metadata"])
140
+ row["shape"] = str(record.get("shape")) # Ensure shape is a string for display.
141
+ row["dtype"] = record.get("dtype")
142
+ processed_for_df.append(row)
143
+
144
+ if processed_for_df:
145
+ results_df = pd.DataFrame(processed_for_df)
146
+ st.dataframe(results_df, use_container_width=True, hide_index=True)
147
+ assistant_message_data["results_df"] = results_df # Store DataFrame for history.
148
+ elif not error_message: # If no data and no error, it might be a query that doesn't return records.
149
+ st.caption("Query processed, no specific records returned.")
150
+
151
+ except Exception as e:
152
+ # Handle errors during DataFrame processing.
153
+ st.error(f"Error formatting results for display: {e}")
154
+ assistant_message_data["error"] = f"Error formatting results: {e}"
155
+
156
+ elif error_message: # If there's a specific error message from the API.
157
+ st.error(error_message)
158
+ assistant_message_data["error"] = error_message
159
+
160
+ # If no results_data and no explicit error_message, the response_text itself is the primary message.
161
+
162
+ # Add assistant's response (including any processed data or errors) to chat history.
163
+ st.session_state.nql_messages_v2.append(assistant_message_data)
164
+
165
+ else: # This block runs when the page loads or if the chat input is empty.
166
+ # Show a welcome/instruction message if the chat history is empty.
167
+ if not st.session_state.nql_messages_v2:
168
+ st.info("Ask me anything about your data! For example: 'list datasets' or 'show tensors from dataset XYZ limit 5'.")
pages/pages_shared_utils.py ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/pages_shared_utils.py
2
+ """
3
+ Shared utility functions for Streamlit pages.
4
+ Copied/adapted from app.py to avoid complex import issues.
5
+ """
6
+ import streamlit as st
7
+ import requests
8
+ import logging
9
+ import os # Added import
10
+ from typing import Optional, List, Dict, Any # Added for new functions
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ API_BASE_URL = os.getenv("API_BASE_URL", "http://127.0.0.1:7860") # Changed API_BASE_URL
15
+
16
+ def load_css():
17
+ """Loads the main CSS styles. Assumes app.py's CSS content."""
18
+ st.markdown("""
19
+ <style>
20
+ /* --- Shared Base Styles for Tensorus Platform (Nexus Theme) --- */
21
+
22
+ /* General Page Styles */
23
+ body {
24
+ font-family: 'Arial', 'Helvetica Neue', 'Helvetica', sans-serif;
25
+ line-height: 1.6;
26
+ }
27
+ .stApp { /* Main Streamlit app container */
28
+ background-color: #0a0f2c; /* Primary Background: Dark blue/purple */
29
+ color: #e0e0e0; /* Default Text Color: Light grey */
30
+ }
31
+
32
+ /* Headings & Titles */
33
+ h1, .stTitle { /* Main page titles */
34
+ color: #d0d0ff !important; /* Primary Heading Color: Light purple/blue */
35
+ font-weight: bold !important;
36
+ }
37
+ h2, .stSubheader { /* Section headers */
38
+ color: #c0c0ef !important; /* Secondary Heading Color */
39
+ font-weight: bold !important;
40
+ border-bottom: 1px solid #3a3f5c; /* Accent Border for separation */
41
+ padding-bottom: 0.3rem;
42
+ margin-top: 1.5rem;
43
+ margin-bottom: 1rem;
44
+ }
45
+ h3 { /* General h3, often used in st.markdown */
46
+ color: #b0b0df !important; /* Tertiary Heading Color */
47
+ font-weight: bold !important;
48
+ }
49
+ .stMarkdown p, .stText, .stListItem { /* General text elements */
50
+ color: #c0c0dd; /* Softer light text */
51
+ font-size: 1rem;
52
+ }
53
+ .stCaption, caption { /* Streamlit captions and HTML captions */
54
+ font-size: 0.85rem !important;
55
+ color: #a0a0c0 !important; /* Muted color for captions */
56
+ }
57
+
58
+ /* Custom Top Navigation Bar */
59
+ .topnav-container {
60
+ background-color: #1a1f3c; /* Nav Background: Slightly darker than page */
61
+ padding: 0.5rem 1rem;
62
+ border-bottom: 1px solid #3a3f5c; /* Accent Border */
63
+ display: flex;
64
+ justify-content: flex-start;
65
+ align-items: center;
66
+ position: sticky; top: 0; z-index: 1000; /* Ensure it's on top */
67
+ width: 100%;
68
+ box-sizing: border-box;
69
+ }
70
+ .topnav-container .logo {
71
+ font-size: 1.5em;
72
+ font-weight: bold;
73
+ color: #d0d0ff; /* Primary Heading Color for logo */
74
+ margin-right: 2rem;
75
+ }
76
+ .topnav-container nav a {
77
+ color: #c0c0ff; /* Lighter Text for Nav Links */
78
+ padding: 0.75rem 1rem;
79
+ text-decoration: none;
80
+ font-weight: 500;
81
+ margin-right: 0.5rem;
82
+ border-radius: 4px;
83
+ transition: background-color 0.2s ease, color 0.2s ease;
84
+ }
85
+ .topnav-container nav a:hover {
86
+ background-color: #2a2f4c; /* Nav Link Hover Background */
87
+ color: #ffffff; /* Nav Link Hover Text */
88
+ }
89
+ .topnav-container nav a.active {
90
+ background-color: #3a6fbf; /* Active Nav Link Background (Accent Blue) */
91
+ color: #ffffff; /* Active Nav Link Text */
92
+ font-weight: bold;
93
+ }
94
+
95
+ /* Common Card Style (base for metric cards, agent cards, etc.) */
96
+ .common-card {
97
+ background-color: #18223f; /* Card Background: Darker than nav, but lighter than page */
98
+ border: 1px solid #2a3f5c; /* Card Border: Accent Border color */
99
+ border-radius: 10px;
100
+ padding: 1.5rem;
101
+ margin-bottom: 1rem; /* Space below cards */
102
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
103
+ transition: transform 0.2s ease-in-out, box-shadow 0.2s ease-in-out;
104
+ color: #e0e0e0; /* Default text color within cards */
105
+ }
106
+ .common-card:hover {
107
+ transform: translateY(-5px);
108
+ box-shadow: 0 8px 16px rgba(0,0,0,0.3);
109
+ }
110
+ .common-card h3 { /* Titles within cards */
111
+ color: #b0b0df !important; /* Card Title Color: Slightly lighter than main headings */
112
+ font-size: 1.2em !important; /* Slightly larger for card titles */
113
+ margin-top: 0 !important; /* Remove default top margin for h3 in card */
114
+ margin-bottom: 0.75rem !important;
115
+ font-weight: bold !important;
116
+ border-bottom: none !important; /* Override general h2 border for card h3 */
117
+ }
118
+ .common-card p { /* Paragraphs within cards */
119
+ font-size: 0.95em !important; /* Slightly smaller for card content */
120
+ color: #c0c0dd !important;
121
+ margin-bottom: 0.5rem !important;
122
+ }
123
+ .common-card .icon { /* For icons within cards, like dashboard metric cards */
124
+ font-size: 2.5em;
125
+ margin-bottom: 0.75rem;
126
+ color: #7070ff; /* Icon Color: Muted accent */
127
+ }
128
+
129
+
130
+ /* Status Indicators (can be used with <span> or <p> or custom divs) */
131
+ .status-indicator {
132
+ padding: 0.4rem 0.8rem !important; /* Slightly more padding */
133
+ border-radius: 15px !important; /* Pill shape */
134
+ font-weight: bold !important;
135
+ font-size: 0.85em !important;
136
+ display: inline-block !important;
137
+ text-align: center !important;
138
+ }
139
+ .status-success, .status-running { color: #ffffff !important; background-color: #4CAF50 !important; } /* Green */
140
+ .status-error { color: #ffffff !important; background-color: #F44336 !important; } /* Red */
141
+ .status-warning { color: #000000 !important; background-color: #FFC107 !important; } /* Amber */
142
+ .status-info { color: #ffffff !important; background-color: #2196F3 !important; } /* Blue */
143
+ .status-stopped { color: #e0e0e0 !important; background-color: #525252 !important; } /* Darker Grey for stopped */
144
+ .status-unknown { color: #333333 !important; background-color: #BDBDBD !important; } /* Lighter grey for unknown */
145
+
146
+
147
+ /* Standardized Streamlit Input Styling */
148
+ .stTextInput > div > div > input,
149
+ .stTextArea > div > div > textarea,
150
+ .stSelectbox > div > div,
151
+ .stNumberInput > div > div > input {
152
+ border: 1px solid #3a3f5c !important; /* Accent Border */
153
+ background-color: #1a1f3c !important; /* Nav Background color for inputs */
154
+ color: #e0e0e0 !important; /* Default Text Color */
155
+ border-radius: 5px !important;
156
+ }
157
+ .stMultiSelect > div > div > div { /* Multiselect options container */
158
+ border: 1px solid #3a3f5c !important;
159
+ background-color: #1a1f3c !important;
160
+ }
161
+ .stMultiSelect span[data-baseweb="tag"] { /* Selected items in multiselect */
162
+ background-color: #3a6fbf !important; /* Active Nav Link Background */
163
+ }
164
+
165
+
166
+ /* Standardized Streamlit Button Styling */
167
+ .stButton > button {
168
+ border: 1px solid #3a6fbf !important; /* Accent Blue for border */
169
+ background-color: #3a6fbf !important; /* Accent Blue for background */
170
+ color: white !important;
171
+ border-radius: 5px !important;
172
+ padding: 0.5rem 1rem !important;
173
+ transition: background-color 0.2s ease, border-color 0.2s ease;
174
+ }
175
+ .stButton > button:hover {
176
+ background-color: #4a7fdc !important; /* Lighter Accent Blue for hover */
177
+ border-color: #4a7fdc !important;
178
+ }
179
+ .stButton > button:disabled {
180
+ background-color: #2a2f4c !important;
181
+ border-color: #2a2f4c !important;
182
+ color: #777777 !important;
183
+ }
184
+ /* For secondary buttons, Streamlit uses a 'kind' attribute in HTML we can't directly target via pure CSS.
185
+ Instead, use st.button(..., type="secondary") and rely on Streamlit's handling,
186
+ or use st.markdown for fully custom buttons if default secondary is not enough.
187
+ The below attempts to style based on common Streamlit secondary button appearance.
188
+ Note: This specific selector for secondary buttons might be unstable if Streamlit changes its internal class names.
189
+ */
190
+ .stButton button.st-emotion-cache-LPTKCI { /* Example selector for a secondary button, MAY BE UNSTABLE */
191
+ background-color: #2a2f4c !important;
192
+ border: 1px solid #2a2f4c !important;
193
+ color: #c0c0ff !important;
194
+ }
195
+ .stButton button.st-emotion-cache-LPTKCI:hover {
196
+ background-color: #3a3f5c !important;
197
+ border-color: #3a3f5c !important;
198
+ color: #ffffff !important;
199
+ }
200
+ .stButton button.st-emotion-cache-LPTKCI:disabled { /* Disabled secondary button */
201
+ background-color: #1e2a47 !important;
202
+ border-color: #1e2a47 !important;
203
+ color: #555555 !important;
204
+ }
205
+
206
+
207
+ /* Dataframe styling */
208
+ .stDataFrame { /* Main container for dataframes */
209
+ border: 1px solid #2a3f5c !important; /* Accent Border */
210
+ border-radius: 5px !important;
211
+ background-color: #1a1f3c !important; /* Nav Background for dataframe background */
212
+ }
213
+ .stDataFrame th { /* Headers */
214
+ background-color: #2a2f4c !important; /* Nav Link Hover Background for headers */
215
+ color: #d0d0ff !important; /* Primary Heading Color for header text */
216
+ font-weight: bold;
217
+ }
218
+ .stDataFrame td { /* Cells */
219
+ color: #c0c0dd !important; /* Softer light text for cell data */
220
+ border-bottom-color: #2a3f5c !important; /* Accent border for cell lines */
221
+ border-top-color: #2a3f5c !important;
222
+ }
223
+
224
+ </style>
225
+ """, unsafe_allow_html=True)
226
+
227
+ def get_api_status() -> tuple[bool, dict]:
228
+ """
229
+ Checks if the backend API is reachable and returns its status.
230
+
231
+ Uses the `API_BASE_URL` constant defined in this module.
232
+
233
+ Returns:
234
+ tuple[bool, dict]: A tuple where:
235
+ - The first element is a boolean: True if the API is reachable and returns a 2xx status, False otherwise.
236
+ - The second element is a dictionary:
237
+ - If successful, contains API information (e.g., from `response.json()`).
238
+ - If unsuccessful, contains an 'error' key with a descriptive message.
239
+ """
240
+ try:
241
+ response = requests.get(f"{API_BASE_URL}/", timeout=3) # Increased timeout slightly
242
+ response.raise_for_status()
243
+ return True, response.json()
244
+ except requests.exceptions.RequestException as e:
245
+ logger.error(f"API connection error in get_api_status (shared_utils): {e}")
246
+ return False, {"error": f"API connection failed: {str(e)}"}
247
+ except Exception as e:
248
+ logger.exception(f"Unexpected error in get_api_status (shared_utils): {e}")
249
+ return False, {"error": f"An unexpected error occurred: {str(e)}"}
250
+
251
+ def get_agent_status() -> Optional[dict]:
252
+ """
253
+ Fetches the status for all registered agents from the backend.
254
+
255
+ Uses the `API_BASE_URL` constant defined in this module.
256
+ On successful API call, returns a dictionary where keys are agent IDs
257
+ and values are dictionaries containing status and configuration for each agent.
258
+ Returns None if the API call fails or an exception occurs.
259
+
260
+ Returns:
261
+ Optional[dict]: Agent statuses dictionary or None.
262
+ """
263
+ try:
264
+ response = requests.get(f"{API_BASE_URL}/agents/status", timeout=5)
265
+ response.raise_for_status()
266
+ return response.json()
267
+ except requests.exceptions.RequestException as e:
268
+ logger.error(f"API error fetching agent status (shared_utils): {e}")
269
+ return None
270
+ except Exception as e:
271
+ logger.exception(f"Unexpected error in get_agent_status (shared_utils): {e}")
272
+ return None
273
+
274
+ def start_agent(agent_id: str) -> dict:
275
+ """
276
+ Sends a request to the backend to start a specific agent.
277
+
278
+ Uses the `API_BASE_URL` constant defined in this module.
279
+ Constructs a POST request to the `/agents/{agent_id}/start` endpoint.
280
+
281
+ Args:
282
+ agent_id (str): The unique identifier of the agent to start.
283
+
284
+ Returns:
285
+ dict: A dictionary containing the API response. Typically includes a 'success' boolean
286
+ and a 'message' string. In case of connection or unexpected errors,
287
+ it also returns a dict with 'success': False and an error 'message'.
288
+ """
289
+ try:
290
+ response = requests.post(f"{API_BASE_URL}/agents/{agent_id}/start", timeout=7) # Increased timeout
291
+ response.raise_for_status()
292
+ return response.json()
293
+ except requests.exceptions.RequestException as e:
294
+ logger.error(f"API error starting agent {agent_id} (shared_utils): {e}")
295
+ return {"success": False, "message": f"Failed to start agent {agent_id}: {str(e)}"}
296
+ except Exception as e:
297
+ logger.exception(f"Unexpected error in start_agent (shared_utils) for {agent_id}: {e}")
298
+ return {"success": False, "message": f"An unexpected error occurred: {str(e)}"}
299
+
300
+ def stop_agent(agent_id: str) -> dict:
301
+ """
302
+ Sends a request to the backend to stop a specific agent.
303
+
304
+ Uses the `API_BASE_URL` constant defined in this module.
305
+ Constructs a POST request to the `/agents/{agent_id}/stop` endpoint.
306
+
307
+ Args:
308
+ agent_id (str): The unique identifier of the agent to stop.
309
+
310
+ Returns:
311
+ dict: A dictionary containing the API response. Typically includes a 'success' boolean
312
+ and a 'message' string. In case of connection or unexpected errors,
313
+ it also returns a dict with 'success': False and an error 'message'.
314
+ """
315
+ try:
316
+ response = requests.post(f"{API_BASE_URL}/agents/{agent_id}/stop", timeout=7) # Increased timeout
317
+ response.raise_for_status()
318
+ return response.json()
319
+ except requests.exceptions.RequestException as e:
320
+ logger.error(f"API error stopping agent {agent_id} (shared_utils): {e}")
321
+ return {"success": False, "message": f"Failed to stop agent {agent_id}: {str(e)}"}
322
+ except Exception as e:
323
+ logger.exception(f"Unexpected error in stop_agent (shared_utils) for {agent_id}: {e}")
324
+ return {"success": False, "message": f"An unexpected error occurred: {str(e)}"}
325
+
326
+ def get_datasets() -> list[str]:
327
+ """
328
+ Fetches the list of available dataset names from the backend.
329
+
330
+ Uses the `API_BASE_URL` constant defined in this module.
331
+ Targets the `/explorer/datasets` endpoint.
332
+
333
+ Returns:
334
+ list[str]: A list of dataset names. Returns an empty list if the API call
335
+ fails, if the 'datasets' key is missing in the response,
336
+ or if an exception occurs.
337
+ """
338
+ try:
339
+ response = requests.get(f"{API_BASE_URL}/explorer/datasets", timeout=5)
340
+ response.raise_for_status()
341
+ data = response.json()
342
+ return data.get("datasets", [])
343
+ except requests.exceptions.RequestException as e:
344
+ logger.error(f"API error fetching datasets (shared_utils): {e}")
345
+ return []
346
+ except Exception as e:
347
+ logger.exception(f"Unexpected error in get_datasets (shared_utils): {e}")
348
+ return []
349
+
350
+ def get_dataset_preview(dataset_name: str, limit: int = 10) -> Optional[dict]:
351
+ """
352
+ Fetches preview data for a specific dataset from the backend.
353
+
354
+ Uses the `API_BASE_URL` constant defined in this module.
355
+ Targets the `/explorer/dataset/{dataset_name}/preview` endpoint with a `limit` parameter.
356
+
357
+ Args:
358
+ dataset_name (str): The name of the dataset to preview.
359
+ limit (int): The maximum number of records to fetch for the preview. Defaults to 10.
360
+
361
+ Returns:
362
+ Optional[dict]: A dictionary containing dataset information (e.g., 'dataset',
363
+ 'record_count', 'preview' list of records) if successful.
364
+ Each record in the 'preview' list is a dictionary typically
365
+ containing 'id', 'shape', 'dtype', 'metadata', and 'data' (raw list).
366
+ Returns None if the API call fails or an exception occurs.
367
+ """
368
+ try:
369
+ response = requests.get(f"{API_BASE_URL}/explorer/dataset/{dataset_name}/preview?limit={limit}", timeout=10)
370
+ response.raise_for_status()
371
+ return response.json()
372
+ except requests.exceptions.RequestException as e:
373
+ logger.error(f"API error fetching dataset preview for {dataset_name} (shared_utils): {e}")
374
+ return None
375
+ except Exception as e:
376
+ logger.exception(f"Unexpected error in get_dataset_preview (shared_utils) for {dataset_name}: {e}")
377
+ return None
378
+
379
+
380
+
381
+
382
+ def get_tensor_metadata(dataset_name: str, tensor_id: str) -> Optional[dict]:
383
+ """Fetch metadata for a specific tensor via the Explorer API."""
384
+ try:
385
+ response = requests.get(
386
+ f"{API_BASE_URL}/explorer/dataset/{dataset_name}/tensor/{tensor_id}/metadata",
387
+ timeout=5,
388
+ )
389
+ response.raise_for_status()
390
+ data = response.json()
391
+ return data.get("metadata", data)
392
+ except requests.exceptions.RequestException as e:
393
+ logger.error(
394
+ f"API error fetching tensor metadata for {dataset_name}/{tensor_id} (shared_utils): {e}"
395
+ )
396
+ return None
397
+ except Exception as e:
398
+ logger.exception(
399
+ f"Unexpected error in get_tensor_metadata (shared_utils) for {dataset_name}/{tensor_id}: {e}"
400
+
401
+ )
402
+
403
+ return None
404
+
405
+ def list_all_agents() -> list[dict[str, str]]:
406
+ """
407
+ Returns a list of agent details based on the current statuses fetched by `get_agent_status`.
408
+
409
+ Each agent detail is a dictionary with 'id', 'name', and 'status' keys.
410
+ If an agent's name is not explicitly provided in the status data, a default name
411
+ is generated by capitalizing the agent_id and replacing underscores with spaces.
412
+
413
+ This function is a convenience wrapper around `get_agent_status()` if a list
414
+ format of agent information is preferred.
415
+
416
+ Returns:
417
+ list[dict[str, str]]: A list of dictionaries, where each dictionary represents an agent
418
+ and contains its 'id', 'name', and 'status'.
419
+ Returns an empty list if agent statuses cannot be fetched.
420
+ """
421
+ agents_status_data = get_agent_status()
422
+ if agents_status_data:
423
+ return [
424
+ {
425
+ "id": agent_id,
426
+ "name": agent_data.get("name", agent_id.replace("_", " ").title()), # Default formatted name
427
+ "status": agent_data.get("status", "unknown")
428
+ }
429
+ for agent_id, agent_data in agents_status_data.items()
430
+ ]
431
+ return []
432
+
433
+ def post_nql_query(query: str) -> dict:
434
+ """
435
+ Sends an NQL query to the backend for processing.
436
+
437
+ Uses the `API_BASE_URL` constant defined in this module.
438
+ Constructs a POST request to the `/chat/query` endpoint with the user's query.
439
+
440
+ Args:
441
+ query (str): The Natural Query Language query string provided by the user.
442
+
443
+ Returns:
444
+ dict: A dictionary containing the API response.
445
+ On success, this typically includes:
446
+ - 'query': The original query string.
447
+ - 'response_text': A textual summary of the NQL agent's action or findings.
448
+ - 'results': A list of records (tensors with metadata) if the query involved data retrieval.
449
+ Each record is a dictionary, potentially including 'id', 'shape',
450
+ 'dtype', 'metadata', and 'data'.
451
+ - 'count': Number of results found (if applicable).
452
+ On failure (e.g., connection error, API error, unexpected server error):
453
+ - 'query': The original query.
454
+ - 'response_text': An error message.
455
+ - 'error': A more detailed error string.
456
+ - 'results': None or an empty list.
457
+ """
458
+ try:
459
+ response = requests.post(
460
+ f"{API_BASE_URL}/chat/query", # Ensure API_BASE_URL is defined in this file
461
+ json={"query": query},
462
+ timeout=15
463
+ )
464
+ response.raise_for_status()
465
+ return response.json()
466
+ except requests.exceptions.RequestException as e:
467
+ logger.error(f"Error posting NQL query from pages_shared_utils: {e}")
468
+ # Let the caller handle UI error display
469
+ return {"query": query, "response_text": "Error connecting to backend or processing query.", "error": str(e), "results": None}
470
+ except Exception as e:
471
+ logger.exception(f"Unexpected error in post_nql_query (pages_shared_utils): {e}")
472
+ return {"query": query, "response_text": "An unexpected error occurred.", "error": str(e), "results": None}
473
+
474
+ # --- Functions to be added from app.py ---
475
+
476
+ def configure_agent(agent_id: str, config: dict) -> dict:
477
+ """
478
+ Sends a request to the backend to configure a specific agent.
479
+
480
+ Uses the `API_BASE_URL` constant defined in this module.
481
+ Constructs a POST request to the `/agents/{agent_id}/configure` endpoint.
482
+
483
+ Args:
484
+ agent_id (str): The unique identifier of the agent to configure.
485
+ config (dict): The configuration dictionary for the agent.
486
+
487
+ Returns:
488
+ dict: A dictionary containing the API response. Typically includes 'success' boolean
489
+ and a 'message' string. In case of connection or unexpected errors,
490
+ it also returns a dict with 'success': False and an error 'message'.
491
+ """
492
+ try:
493
+ response = requests.post(
494
+ f"{API_BASE_URL}/agents/{agent_id}/configure",
495
+ json={"config": config},
496
+ timeout=7 # Increased timeout similar to start/stop
497
+ )
498
+ response.raise_for_status()
499
+ return response.json()
500
+ except requests.exceptions.RequestException as e:
501
+ logger.error(f"API error configuring agent {agent_id} (shared_utils): {e}")
502
+ return {"success": False, "message": f"Failed to configure agent {agent_id}: {str(e)}"}
503
+ except Exception as e:
504
+ logger.exception(f"Unexpected error in configure_agent (shared_utils) for {agent_id}: {e}")
505
+ return {"success": False, "message": f"An unexpected error occurred: {str(e)}"}
506
+
507
+ def operate_explorer(dataset: str, operation: str, index: int, params: dict) -> dict:
508
+ """
509
+ Sends an operation request to the data explorer for a specific tensor.
510
+
511
+ Uses the `API_BASE_URL` constant defined in this module.
512
+ Constructs a POST request to the `/explorer/operate` endpoint.
513
+
514
+ Args:
515
+ dataset (str): The name of the dataset containing the tensor.
516
+ operation (str): The operation to perform (e.g., 'view', 'transform').
517
+ index (int): The index of the tensor within the dataset.
518
+ params (dict): Additional parameters required for the operation.
519
+
520
+ Returns:
521
+ dict: A dictionary containing the API response. Typically includes:
522
+ - 'success': A boolean indicating if the operation was accepted.
523
+ - 'metadata': A dictionary with details about the operation or resulting tensor.
524
+ - 'result_data': The data of the resulting tensor (if applicable), or None.
525
+ In case of connection or server-side errors, it returns a dict with
526
+ 'success': False, 'metadata': {'error': error_message}, and 'result_data': None.
527
+ """
528
+ payload = {
529
+ "dataset": dataset,
530
+ "operation": operation,
531
+ "tensor_index": index,
532
+ "params": params
533
+ }
534
+ try:
535
+ response = requests.post(
536
+ f"{API_BASE_URL}/explorer/operate",
537
+ json=payload,
538
+ timeout=15 # Standard timeout for potentially long operations
539
+ )
540
+ response.raise_for_status()
541
+ return response.json()
542
+ except requests.exceptions.RequestException as e:
543
+ logger.error(f"API error in operate_explorer for {dataset} (shared_utils): {e}")
544
+ return {"success": False, "metadata": {"error": str(e)}, "result_data": None}
545
+ except Exception as e:
546
+ logger.exception(f"Unexpected error in operate_explorer (shared_utils) for {dataset}: {e}")
547
+ return {"success": False, "metadata": {"error": str(e)}, "result_data": None}
pages/ui_utils.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ui_utils.py (Modifications for Step 3)
2
+ """Utility functions for the Tensorus Streamlit UI, now using API calls."""
3
+
4
+ import requests
5
+ import streamlit as st
6
+ import logging
7
+ from typing import List, Dict, Any, Optional
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # --- Configuration ---
12
+ TENSORUS_API_URL = "http://127.0.0.1:7860" # Ensure FastAPI runs here
13
+
14
+ # --- API Interaction Functions ---
15
+
16
+ def get_api_status() -> bool:
17
+ """Checks if the Tensorus API is reachable."""
18
+ try:
19
+ response = requests.get(f"{TENSORUS_API_URL}/", timeout=2)
20
+ return response.status_code == 200
21
+ except requests.exceptions.ConnectionError:
22
+ return False
23
+ except Exception as e:
24
+ logger.error(f"Error checking API status: {e}")
25
+ return False
26
+
27
+ def list_datasets() -> Optional[List[str]]:
28
+ """Fetches the list of dataset names from the API."""
29
+ try:
30
+ response = requests.get(f"{TENSORUS_API_URL}/datasets")
31
+ response.raise_for_status()
32
+ data = response.json()
33
+ if data.get("success"):
34
+ return data.get("data", [])
35
+ else:
36
+ st.error(f"API Error listing datasets: {data.get('message')}")
37
+ return None
38
+ except requests.exceptions.RequestException as e:
39
+ st.error(f"Connection Error listing datasets: {e}")
40
+ return None
41
+ except Exception as e:
42
+ st.error(f"Unexpected error listing datasets: {e}")
43
+ logger.exception("Unexpected error in list_datasets")
44
+ return None
45
+
46
+ def fetch_dataset_data(dataset_name: str, offset: int = 0, limit: int = 50) -> Optional[List[Dict[str, Any]]]:
47
+ """Fetches a page of records from a dataset via API."""
48
+ try:
49
+ params = {"offset": offset, "limit": limit}
50
+ response = requests.get(f"{TENSORUS_API_URL}/datasets/{dataset_name}/records", params=params)
51
+ response.raise_for_status()
52
+ data = response.json()
53
+ if data.get("success"):
54
+ return data.get("data", [])
55
+ else:
56
+ st.error(f"API Error fetching '{dataset_name}': {data.get('message')}")
57
+ return None
58
+ except requests.exceptions.RequestException as e:
59
+ st.error(f"Connection Error fetching '{dataset_name}': {e}")
60
+ return None
61
+ except Exception as e:
62
+ st.error(f"Unexpected error fetching '{dataset_name}': {e}")
63
+ logger.exception(f"Unexpected error in fetch_dataset_data for {dataset_name}")
64
+ return None
65
+
66
+ def execute_nql_query(query: str) -> Optional[Dict[str, Any]]:
67
+ """Sends an NQL query to the API."""
68
+ try:
69
+ payload = {"query": query}
70
+ response = requests.post(f"{TENSORUS_API_URL}/query", json=payload)
71
+ # Handle specific NQL errors (400) vs other errors
72
+ if response.status_code == 400:
73
+ error_detail = response.json().get("detail", "Unknown NQL processing error")
74
+ return {"success": False, "message": error_detail, "results": None, "count": None}
75
+ response.raise_for_status() # Raise for 5xx etc.
76
+ return response.json() # Return the full NQLResponse structure
77
+ except requests.exceptions.RequestException as e:
78
+ st.error(f"Connection Error executing NQL query: {e}")
79
+ return {"success": False, "message": f"Connection Error: {e}", "results": None, "count": None}
80
+ except Exception as e:
81
+ st.error(f"Unexpected error executing NQL query: {e}")
82
+ logger.exception("Unexpected error in execute_nql_query")
83
+ return {"success": False, "message": f"Unexpected Error: {e}", "results": None, "count": None}
84
+
85
+ # --- NEW/UPDATED Agent and Metrics Functions ---
86
+
87
+ def list_all_agents() -> Optional[List[Dict[str, Any]]]:
88
+ """Fetches the list of all registered agents from the API."""
89
+ try:
90
+ response = requests.get(f"{TENSORUS_API_URL}/agents")
91
+ response.raise_for_status()
92
+ # The response is directly the list of AgentInfo objects
93
+ return response.json()
94
+ except requests.exceptions.RequestException as e:
95
+ st.error(f"Connection Error listing agents: {e}")
96
+ return None
97
+ except Exception as e:
98
+ st.error(f"Unexpected error listing agents: {e}")
99
+ logger.exception("Unexpected error in list_all_agents")
100
+ return None
101
+
102
+ def get_agent_status(agent_id: str) -> Optional[Dict[str, Any]]:
103
+ """Fetches status for a specific agent from the API."""
104
+ try:
105
+ response = requests.get(f"{TENSORUS_API_URL}/agents/{agent_id}/status")
106
+ if response.status_code == 404:
107
+ st.error(f"Agent '{agent_id}' not found via API.")
108
+ return None
109
+ response.raise_for_status()
110
+ # Returns AgentStatus model dict
111
+ return response.json()
112
+ except requests.exceptions.RequestException as e:
113
+ st.error(f"Connection Error getting status for agent '{agent_id}': {e}")
114
+ return None
115
+ except Exception as e:
116
+ st.error(f"Unexpected error getting status for agent '{agent_id}': {e}")
117
+ logger.exception(f"Unexpected error in get_agent_status for {agent_id}")
118
+ return None
119
+
120
+ def get_agent_logs(agent_id: str, lines: int = 20) -> Optional[List[str]]:
121
+ """Fetches recent logs for a specific agent from the API."""
122
+ try:
123
+ response = requests.get(f"{TENSORUS_API_URL}/agents/{agent_id}/logs", params={"lines": lines})
124
+ if response.status_code == 404:
125
+ st.error(f"Agent '{agent_id}' not found via API for logs.")
126
+ return None
127
+ response.raise_for_status()
128
+ data = response.json()
129
+ # Returns AgentLogResponse model dict
130
+ return data.get("logs", [])
131
+ except requests.exceptions.RequestException as e:
132
+ st.error(f"Connection Error getting logs for agent '{agent_id}': {e}")
133
+ return None
134
+ except Exception as e:
135
+ st.error(f"Unexpected error getting logs for agent '{agent_id}': {e}")
136
+ logger.exception(f"Unexpected error in get_agent_logs for {agent_id}")
137
+ return None
138
+
139
+ def start_agent(agent_id: str) -> bool:
140
+ """Sends a start signal to an agent via the API."""
141
+ try:
142
+ response = requests.post(f"{TENSORUS_API_URL}/agents/{agent_id}/start")
143
+ if response.status_code == 404:
144
+ st.error(f"Agent '{agent_id}' not found via API.")
145
+ return False
146
+ # 202 Accepted is success, other 2xx might be okay too (e.g. already running if handled gracefully)
147
+ # 4xx errors indicate failure
148
+ if 200 <= response.status_code < 300:
149
+ api_response = response.json()
150
+ if api_response.get("success"):
151
+ st.success(f"API: {api_response.get('message', 'Start signal sent.')}")
152
+ return True
153
+ else:
154
+ # API indicated logical failure (e.g., already running)
155
+ st.warning(f"API: {api_response.get('message', 'Agent might already be running.')}")
156
+ return False
157
+ else:
158
+ # Handle other potential errors reported by API
159
+ error_detail = "Unknown error"
160
+ try: error_detail = response.json().get("detail", error_detail)
161
+ except: pass
162
+ st.error(f"API Error starting agent '{agent_id}': {error_detail} (Status: {response.status_code})")
163
+ return False
164
+ except requests.exceptions.RequestException as e:
165
+ st.error(f"Connection Error starting agent '{agent_id}': {e}")
166
+ return False
167
+ except Exception as e:
168
+ st.error(f"Unexpected error starting agent '{agent_id}': {e}")
169
+ logger.exception(f"Unexpected error in start_agent for {agent_id}")
170
+ return False
171
+
172
+ def stop_agent(agent_id: str) -> bool:
173
+ """Sends a stop signal to an agent via the API."""
174
+ try:
175
+ response = requests.post(f"{TENSORUS_API_URL}/agents/{agent_id}/stop")
176
+ if response.status_code == 404:
177
+ st.error(f"Agent '{agent_id}' not found via API.")
178
+ return False
179
+ if 200 <= response.status_code < 300:
180
+ api_response = response.json()
181
+ if api_response.get("success"):
182
+ st.success(f"API: {api_response.get('message', 'Stop signal sent.')}")
183
+ return True
184
+ else:
185
+ st.warning(f"API: {api_response.get('message', 'Agent might already be stopped.')}")
186
+ return False
187
+ else:
188
+ error_detail = "Unknown error"
189
+ try: error_detail = response.json().get("detail", error_detail)
190
+ except: pass
191
+ st.error(f"API Error stopping agent '{agent_id}': {error_detail} (Status: {response.status_code})")
192
+ return False
193
+ except requests.exceptions.RequestException as e:
194
+ st.error(f"Connection Error stopping agent '{agent_id}': {e}")
195
+ return False
196
+ except Exception as e:
197
+ st.error(f"Unexpected error stopping agent '{agent_id}': {e}")
198
+ logger.exception(f"Unexpected error in stop_agent for {agent_id}")
199
+ return False
200
+
201
+ def get_dashboard_metrics() -> Optional[Dict[str, Any]]:
202
+ """Fetches dashboard metrics from the API."""
203
+ try:
204
+ response = requests.get(f"{TENSORUS_API_URL}/metrics/dashboard")
205
+ response.raise_for_status()
206
+ # Returns DashboardMetrics model dict
207
+ return response.json()
208
+ except requests.exceptions.RequestException as e:
209
+ st.error(f"Connection Error fetching dashboard metrics: {e}")
210
+ return None
211
+ except Exception as e:
212
+ st.error(f"Unexpected error fetching dashboard metrics: {e}")
213
+ logger.exception("Unexpected error in get_dashboard_metrics")
214
+ return None
215
+
216
+ def get_agent_config(agent_id: str) -> Optional[Dict[str, Any]]:
217
+ """Fetch an agent's configuration from the API."""
218
+ try:
219
+ response = requests.get(f"{TENSORUS_API_URL}/agents/{agent_id}/config")
220
+ if response.status_code == 404:
221
+ st.error(f"Agent '{agent_id}' not found via API for configuration.")
222
+ return None
223
+ response.raise_for_status()
224
+ return response.json()
225
+ except requests.exceptions.RequestException as e:
226
+ st.error(f"Connection Error fetching config for agent '{agent_id}': {e}")
227
+ return None
228
+ except Exception as e:
229
+ st.error(f"Unexpected error fetching config for agent '{agent_id}': {e}")
230
+ logger.exception(f"Unexpected error in get_agent_config for {agent_id}")
231
+ return None
232
+
233
+ def update_agent_config(agent_id: str, config: Dict[str, Any]) -> bool:
234
+ """Send updated configuration for an agent to the API."""
235
+ try:
236
+ response = requests.post(
237
+ f"{TENSORUS_API_URL}/agents/{agent_id}/configure",
238
+ json={"config": config},
239
+ )
240
+ if response.status_code == 404:
241
+ st.error(f"Agent '{agent_id}' not found via API for configuration.")
242
+ return False
243
+ if 200 <= response.status_code < 300:
244
+ api_response = response.json()
245
+ if api_response.get("success"):
246
+ st.success(api_response.get("message", "Configuration updated."))
247
+ return True
248
+ else:
249
+ st.error(api_response.get("message", "Failed to update configuration."))
250
+ return False
251
+ else:
252
+ error_detail = "Unknown error"
253
+ try:
254
+ error_detail = response.json().get("detail", error_detail)
255
+ except Exception:
256
+ pass
257
+ st.error(
258
+ f"API Error updating config for '{agent_id}': {error_detail} (Status: {response.status_code})"
259
+ )
260
+ return False
261
+ except requests.exceptions.RequestException as e:
262
+ st.error(f"Connection Error updating config for agent '{agent_id}': {e}")
263
+ return False
264
+ except Exception as e:
265
+ st.error(f"Unexpected error updating config for agent '{agent_id}': {e}")
266
+ logger.exception(f"Unexpected error in update_agent_config for {agent_id}")
267
+ return False
pyproject.toml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "tensorus"
3
+ version = "0.0.4"
4
+ authors = [
5
+ { name = "Tensorus Team", email = "ai@tensorus.com" }
6
+ ]
7
+ description = "An agentic tensor database and agent framework for managing, querying, and automating tensor data workflows."
8
+ readme = "README.md"
9
+ requires-python = ">=3.10"
10
+ license = { file = "LICENSE" }
11
+ keywords = [
12
+ "tensor", "database", "agent", "ai", "pytorch", "fastapi", "streamlit", "automl", "reinforcement-learning", "data-ingestion"
13
+ ]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Topic :: Database",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ "Topic :: Software Development :: Libraries"
25
+ ]
26
+ [project.urls]
27
+ Homepage = "https://tensorus.com"
28
+ Repository = "https://github.com/tensorus/tensorus"
29
+
30
+ [project.optional-dependencies]
31
+ models = ["tensorus-models>=0.0.3"]
32
+
33
+ [build-system]
34
+ requires = ["setuptools>=61.0", "wheel"]
35
+ build-backend = "setuptools.build_meta"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["."]
39
+ include = ["tensorus*"]
40
+ namespaces = false
requirements-test.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements-test.txt
2
+ # Python dependencies needed to run the test suite
3
+
4
+ pytest
5
+ pytest-asyncio
6
+ httpx>=0.28.1
7
+ scipy
8
+ semopy>=2.3
9
+ pydantic-settings>=2.0
10
+ # FastAPI version supporting the Pydantic v2 API
11
+ fastapi>=0.110
12
+ numpy>=1.21.0
13
+ torch
14
+ tensorly
15
+ transformers
16
+ psycopg2-binary
17
+ python-jose[cryptography]
18
+ fastmcp>=0.2.0
requirements.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ # Updated: 2024-07-26 # (Date updated)
3
+
4
+ # --- Core Tensor and Numerics ---
5
+ torch>=1.13.0
6
+ torchvision>=0.14.0
7
+ segmentation-models-pytorch
8
+ transformers
9
+ numpy>=1.21.0
10
+ tensorly
11
+
12
+ # --- Agent Specific Dependencies ---
13
+ # For Ingestion Agent image processing example
14
+ Pillow>=9.0.0
15
+
16
+ # --- API Layer Dependencies ---
17
+ # FastAPI with Pydantic v2 support
18
+ fastapi>=0.110.0
19
+ pydantic>=2.0.0
20
+ pydantic-settings>=2.0
21
+ # ASGI Server (standard includes extras like watchfiles for reload)
22
+ uvicorn[standard]>=0.20.0
23
+ fastmcp>=0.2.0
24
+ # For PostgreSQL connectivity (used by PostgresMetadataStorage)
25
+ psycopg2-binary>=2.9.0
26
+ # Optional: Needed if using FastAPI file uploads via forms
27
+ # python-multipart>=0.0.5
28
+
29
+ # --- Streamlit UI Dependencies ---
30
+ streamlit>=1.25.0
31
+ # For calling the FastAPI backend from the Streamlit UI
32
+ requests>=2.28.0
33
+ # For JWT validation
34
+ python-jose[cryptography]
35
+ # For plotting in the Streamlit UI (Dashboard, Data Explorer)
36
+ plotly>=5.10.0
37
+
38
+ # --- Testing Dependencies ---
39
+ pytest>=7.0.0
40
+ httpx>=0.28.1 # For FastAPI TestClient
41
+
42
+ # --- Data Analysis & Modeling ---
43
+ # Optional: For plotting example in rl_agent.py
44
+ matplotlib>=3.5.0
45
+ # For Time Series Analysis (ARIMA model)
46
+ scikit-learn>=1.3.0
47
+ umap-learn
48
+ pandas>=1.5.0
49
+ arch>=5.7
50
+ lifelines>=0.28
51
+ semopy>=2.3
52
+ gensim
53
+ joblib
54
+ opencv-python
setup.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Setup script for Tensorus development environment
3
+ set -e
4
+
5
+ # Install Python dependencies
6
+ pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
7
+ pip install -r requirements.txt
8
+ pip install -r requirements-test.txt
9
+
10
+ if [[ "$INSTALL_MODELS" == "1" ]]; then
11
+ pip install -e .[models]
12
+ fi
tensor_data/count_ds.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a9272a45a56453ca8c3fbb9a463b86856367419eecc744bdada7a17a63205c
3
+ size 2157
tensor_data/test_dataset_ingest_5dde494e-54cd-4fbc-bc87-e5949cc3809e.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a404c8b32dbc7c670076b5c02d470a5d0a60dc7da3e41450868ad97ccc3a6c62
3
+ size 2432
tensor_data/test_dataset_ingest_743ec0f5-c46c-42b3-92fd-5b8e504b147c.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089e55853478bbbf88e4952df508300589746abb0b2834f78e05d74a906d8079
3
+ size 2432
tensor_data/test_dataset_ingest_d17cec44-8f9f-4856-a343-3ef700784edb.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bca92e9ab4dfc7aec08d872fc264afca1acdf7683e46586ec269331f647acf1
3
+ size 1747
tensor_data/test_dataset_ingest_f3e79d98-6fb4-496d-9763-f0ce3aa1c801.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf925fa73ab1fb4ecf6c4bf8436432d202161f15c18dfcbd6571340e1c525ea
3
+ size 2432
tensor_data/test_dataset_integration_69bf9dbf-0ef4-4dd7-ba2d-6ba3bf31b799.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88b22db97bf4b62a2025175c422772f95165030479757e6ff0ed5442e0da569
3
+ size 1841
tensorus/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tensorus core package."""
2
+
3
+ import os
4
+
5
+ # The repository previously exposed a large collection of models under
6
+ # ``tensorus.models``. These models have been moved to a separate package.
7
+ # Tensorus now only attempts to import them if available.
8
+
9
+ if not os.environ.get("TENSORUS_MINIMAL_IMPORT"):
10
+ try:
11
+ import importlib
12
+
13
+ models = importlib.import_module("tensorus.models")
14
+ __all__ = ["models"]
15
+ except ModuleNotFoundError:
16
+ __all__ = []
17
+ else:
18
+ __all__ = []
tensorus/api.py ADDED
The diff for this file is too large to render. See raw diff
 
tensorus/api/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tensorus API Package.
3
+
4
+ This package contains the FastAPI application and endpoints for interacting
5
+ with Tensor Descriptors and Semantic Metadata.
6
+
7
+ To run the API (assuming Uvicorn is installed and you are in the project
8
+ root):
9
+ `uvicorn tensorus.api:app --reload --port 7860`
10
+ """
11
+
12
+ # Re-export the FastAPI application defined in ``tensorus/api.py`` so users
13
+ # can simply ``import tensorus.api`` to access ``app``. We load the module by
14
+ # path to avoid import order issues with the ``tensorus.api`` package itself.
15
+ import importlib.util
16
+ from pathlib import Path
17
+
18
+ _spec = importlib.util.spec_from_file_location(
19
+ "tensorus.api_app", Path(__file__).resolve().parent.parent / "api.py"
20
+ )
21
+ _api = importlib.util.module_from_spec(_spec)
22
+ _spec.loader.exec_module(_api) # type: ignore[call-arg]
23
+ app = _api.app
24
+
25
+ __all__ = ["app"]
tensorus/api/dependencies.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tensorus.metadata import storage_instance as globally_configured_storage_instance
2
+ from tensorus.metadata.storage_abc import MetadataStorage
3
+
4
+ # Note: The `storage_instance` imported here is already configured (InMemory or Postgres)
5
+ # based on the logic in `tensorus/metadata/__init__.py` which reads `tensorus.config.settings`.
6
+
7
+ def get_storage_instance() -> MetadataStorage:
8
+ """
9
+ FastAPI dependency to get the currently configured metadata storage instance.
10
+ """
11
+ return globally_configured_storage_instance
12
+
13
+ # If we needed to re-initialize or pass settings directly to the storage for each request
14
+ # (e.g. for request-scoped sessions or dynamic configuration per request, which is not the case here),
15
+ # this function would be more complex. For now, it just returns the global instance.
16
+ #
17
+ # Example of re-initializing if storage_instance could change or needs request context:
18
+ # from tensorus.metadata import get_configured_storage_instance, ConfigurationError
19
+ # from tensorus.config import settings
20
+ #
21
+ # def get_storage_instance_dynamic() -> MetadataStorage:
22
+ # try:
23
+ # # This would re-evaluate settings and re-create the instance per request if needed,
24
+ # # or could access request-specific config.
25
+ # # For our current setup, the global instance is fine.
26
+ # return get_configured_storage_instance()
27
+ # except ConfigurationError as e:
28
+ # # This would ideally be caught by a global exception handler in FastAPI
29
+ # # to return a 500 error if configuration is bad during a request.
30
+ # # However, configuration should typically be validated at startup.
31
+ # raise RuntimeError(f"Storage configuration error: {e}")
32
+
33
+ # The current `storage_instance` is initialized once when `tensorus.metadata` is first imported.
34
+ # This is generally fine for many applications unless the configuration needs to change without restarting.
35
+ # The FastAPI `Depends` system will call `get_storage_instance` for each request that uses it,
36
+ # but this function will always return the same globally initialized `storage_instance`.
tensorus/api/endpoints.py ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Optional, Any, Annotated, Literal
2
+ from uuid import UUID
3
+ from datetime import datetime
4
+
5
+ from fastapi import APIRouter, HTTPException, Body, Query, Path
6
+ from pydantic import BaseModel, ValidationError
7
+
8
+ from tensorus.metadata.schemas import (
9
+ TensorDescriptor, SemanticMetadata, DataType,
10
+ LineageSourceType, LineageMetadata, ParentTensorLink,
11
+ ComputationalMetadata, QualityMetadata, RelationalMetadata, UsageMetadata,
12
+ )
13
+ from tensorus.metadata.storage_abc import MetadataStorage
14
+ from tensorus.api.dependencies import get_storage_instance
15
+ from tensorus.storage.connectors import mock_tensor_connector_instance
16
+ from tensorus.metadata.schemas_iodata import TensorusExportData
17
+
18
+ from pydantic import BaseModel as PydanticBaseModel
19
+ from typing import TypeVar, Generic
20
+
21
+ from fastapi import Depends, Security, status # Added status for HTTPException
22
+ from fastapi.responses import JSONResponse
23
+ from .security import verify_api_key, api_key_header_auth
24
+ from tensorus.audit import log_audit_event
25
+
26
+ import copy
27
+ import uuid
28
+
29
+ # Router for TensorDescriptor
30
+ router_tensor_descriptor = APIRouter(
31
+ prefix="/tensor_descriptors",
32
+ tags=["Tensor Descriptors"],
33
+ responses={404: {"description": "Not found"}},
34
+ )
35
+
36
+ # Router for SemanticMetadata
37
+ router_semantic_metadata = APIRouter(
38
+ prefix="/tensor_descriptors/{tensor_id}/semantic", # Corrected prefix for consistency
39
+ tags=["Semantic Metadata (Per Tensor)"],
40
+ responses={404: {"description": "Not found"}},
41
+ )
42
+
43
+ # --- TensorDescriptor Endpoints ---
44
+ @router_tensor_descriptor.post("/", response_model=TensorDescriptor, status_code=status.HTTP_201_CREATED)
45
+ async def create_tensor_descriptor(
46
+ descriptor_data: Dict[str, Any],
47
+ storage: MetadataStorage = Depends(get_storage_instance),
48
+ api_key: str = Depends(verify_api_key)
49
+ ):
50
+ tensor_id_str = descriptor_data.get("tensor_id")
51
+ temp_id_for_lookup = UUID(tensor_id_str) if tensor_id_str else uuid.uuid4()
52
+
53
+ fields_to_fetch = ["shape", "data_type", "byte_size", "dimensionality"]
54
+ missing_fields = [field for field in fields_to_fetch if field not in descriptor_data or descriptor_data[field] is None]
55
+
56
+ if missing_fields:
57
+ storage_details = mock_tensor_connector_instance.get_tensor_details(temp_id_for_lookup)
58
+ if storage_details:
59
+ for field in missing_fields:
60
+ if field in storage_details and (field not in descriptor_data or descriptor_data[field] is None) :
61
+ descriptor_data[field] = storage_details[field]
62
+ if "shape" in descriptor_data and descriptor_data["shape"] is not None \
63
+ and ("dimensionality" not in descriptor_data or descriptor_data["dimensionality"] is None):
64
+ descriptor_data["dimensionality"] = len(descriptor_data["shape"])
65
+ try:
66
+ final_descriptor = TensorDescriptor(**descriptor_data)
67
+ storage.add_tensor_descriptor(final_descriptor)
68
+ if mock_tensor_connector_instance.retrieve_tensor(final_descriptor.tensor_id) is None:
69
+ mock_tensor_data_payload = {
70
+ "shape": final_descriptor.shape, "data_type": final_descriptor.data_type.value,
71
+ "byte_size": final_descriptor.byte_size, "info": "Placeholder data by create_tensor_descriptor"
72
+ }
73
+ mock_tensor_connector_instance.store_tensor(final_descriptor.tensor_id, mock_tensor_data_payload)
74
+ log_audit_event(action="CREATE_TENSOR_DESCRIPTOR", user=api_key, tensor_id=str(final_descriptor.tensor_id),
75
+ details={"owner": final_descriptor.owner, "data_type": final_descriptor.data_type.value})
76
+ return final_descriptor
77
+ except ValidationError as e:
78
+ log_audit_event(action="CREATE_TENSOR_DESCRIPTOR_FAILED_VALIDATION", user=api_key, details={"error": str(e), "input_tensor_id": tensor_id_str})
79
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=e.errors())
80
+ except ValueError as e:
81
+ log_audit_event(action="CREATE_TENSOR_DESCRIPTOR_FAILED_STORAGE", user=api_key, details={"error": str(e), "input_tensor_id": tensor_id_str})
82
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
83
+
84
+ @router_tensor_descriptor.get("/", response_model=List[TensorDescriptor], summary="List Tensor Descriptors with Advanced Filtering")
85
+ async def list_tensor_descriptors(
86
+ owner: Optional[str] = Query(None), data_type: Optional[DataType] = Query(None),
87
+ tags_contain: Optional[List[str]] = Query(None), lineage_version: Optional[str] = Query(None, alias="lineage.version"),
88
+ lineage_source_type: Optional[LineageSourceType] = Query(None, alias="lineage.source.type"),
89
+ comp_algorithm: Optional[str] = Query(None, alias="computational.algorithm"),
90
+ comp_gpu_model: Optional[str] = Query(None, alias="computational.hardware_info.gpu_model"),
91
+ quality_confidence_gt: Optional[float] = Query(None, alias="quality.confidence_score_gt"),
92
+ quality_noise_lt: Optional[float] = Query(None, alias="quality.noise_level_lt"),
93
+ rel_collection: Optional[str] = Query(None, alias="relational.collection"),
94
+ rel_has_related_tensor_id: Optional[UUID] = Query(None, alias="relational.has_related_tensor_id"),
95
+ usage_last_accessed_before: Optional[datetime] = Query(None, alias="usage.last_accessed_before"),
96
+ usage_used_by_app: Optional[str] = Query(None, alias="usage.used_by_app"),
97
+ storage: MetadataStorage = Depends(get_storage_instance)
98
+ ):
99
+ all_descriptors = storage.list_tensor_descriptors(
100
+ owner=owner, data_type=data_type, tags_contain=tags_contain, lineage_version=lineage_version
101
+ ) # Pass some common filters
102
+ filtered_descriptors = []
103
+ for desc in all_descriptors: # Apply remaining filters in memory
104
+ if lineage_source_type and (not (lm := storage.get_lineage_metadata(desc.tensor_id)) or not lm.source or lm.source.type != lineage_source_type): continue
105
+ if comp_algorithm or comp_gpu_model:
106
+ cm = storage.get_computational_metadata(desc.tensor_id)
107
+ if not cm: continue
108
+ if comp_algorithm and cm.algorithm != comp_algorithm: continue
109
+ if comp_gpu_model and (not cm.hardware_info or cm.hardware_info.get("gpu_model") != comp_gpu_model): continue
110
+ if quality_confidence_gt or quality_noise_lt:
111
+ qm = storage.get_quality_metadata(desc.tensor_id)
112
+ if not qm: continue
113
+ if quality_confidence_gt and (qm.confidence_score is None or qm.confidence_score <= quality_confidence_gt): continue
114
+ if quality_noise_lt and (qm.noise_level is None or qm.noise_level >= quality_noise_lt): continue
115
+ if rel_collection or rel_has_related_tensor_id:
116
+ rm = storage.get_relational_metadata(desc.tensor_id)
117
+ if not rm: continue
118
+ if rel_collection and rel_collection not in rm.collections: continue
119
+ if rel_has_related_tensor_id and not any(rtl.related_tensor_id == rel_has_related_tensor_id for rtl in rm.related_tensors): continue
120
+ if usage_last_accessed_before or usage_used_by_app:
121
+ um = storage.get_usage_metadata(desc.tensor_id)
122
+ if not um: continue
123
+ if usage_last_accessed_before and (not um.last_accessed_at or um.last_accessed_at >= usage_last_accessed_before): continue
124
+ if usage_used_by_app and usage_used_by_app not in um.application_references: continue
125
+ filtered_descriptors.append(desc)
126
+ return filtered_descriptors
127
+
128
+ @router_tensor_descriptor.get("/{tensor_id}", response_model=TensorDescriptor)
129
+ async def get_tensor_descriptor(tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance)):
130
+ descriptor = storage.get_tensor_descriptor(tensor_id)
131
+ if not descriptor: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TensorDescriptor ID {tensor_id} not found.")
132
+ return descriptor
133
+
134
+ class TensorDescriptorUpdate(BaseModel):
135
+ dimensionality: Optional[int]=None; shape: Optional[List[int]]=None; data_type: Optional[DataType]=None
136
+ storage_format: Optional[str]=None; owner: Optional[str]=None; access_control: Optional[Dict[str, List[str]]]=None
137
+ byte_size: Optional[int]=None; compression_info: Optional[Dict[str, Any]]=None
138
+ tags: Optional[List[str]]=None; metadata: Optional[Dict[str, Any]]=None
139
+
140
+ @router_tensor_descriptor.put("/{tensor_id}", response_model=TensorDescriptor)
141
+ async def update_tensor_descriptor(
142
+ tensor_id: UUID = Path(...), updates: TensorDescriptorUpdate = Body(...),
143
+ storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
144
+ ):
145
+ update_data = updates.model_dump(exclude_unset=True)
146
+ if not update_data: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No update data provided.")
147
+ current = storage.get_tensor_descriptor(tensor_id)
148
+ if not current:
149
+ log_audit_event("UPDATE_TENSOR_DESCRIPTOR_FAILED_NOT_FOUND", api_key, str(tensor_id))
150
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TensorDescriptor ID {tensor_id} not found.")
151
+ try:
152
+ updated = storage.update_tensor_descriptor(tensor_id, **update_data)
153
+ log_audit_event("UPDATE_TENSOR_DESCRIPTOR", api_key, str(tensor_id), {"updated_fields": list(update_data.keys())})
154
+ return updated
155
+ except (ValidationError, ValueError) as e:
156
+ log_audit_event("UPDATE_TENSOR_DESCRIPTOR_FAILED_VALIDATION", api_key, str(tensor_id), {"error": str(e)})
157
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY if isinstance(e, ValidationError) else status.HTTP_400_BAD_REQUEST, detail=str(e))
158
+
159
+ @router_tensor_descriptor.delete("/{tensor_id}", status_code=status.HTTP_200_OK)
160
+ async def delete_tensor_descriptor(
161
+ tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
162
+ ):
163
+ if not storage.get_tensor_descriptor(tensor_id):
164
+ log_audit_event("DELETE_TENSOR_DESCRIPTOR_FAILED_NOT_FOUND", api_key, str(tensor_id))
165
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TensorDescriptor ID {tensor_id} not found.")
166
+ storage.delete_tensor_descriptor(tensor_id)
167
+ mock_tensor_connector_instance.delete_tensor(tensor_id)
168
+ log_audit_event("DELETE_TENSOR_DESCRIPTOR", api_key, str(tensor_id))
169
+ return {"message": f"TensorDescriptor {tensor_id} and associated data deleted."}
170
+
171
+ # --- SemanticMetadata Endpoints ---
172
+ def _check_td_exists_for_semantic(tensor_id: UUID, storage: MetadataStorage, api_key: Optional[str] = None, action_prefix: str = ""):
173
+ if not storage.get_tensor_descriptor(tensor_id):
174
+ if api_key and action_prefix:
175
+ log_audit_event(f"{action_prefix}_SEMANTIC_METADATA_FAILED_TD_NOT_FOUND", user=api_key, tensor_id=str(tensor_id))
176
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Parent TensorDescriptor ID {tensor_id} not found.")
177
+
178
+ @router_semantic_metadata.post("/", response_model=SemanticMetadata, status_code=status.HTTP_201_CREATED)
179
+ async def create_semantic_metadata_for_tensor(
180
+ tensor_id: UUID = Path(...), metadata_in: SemanticMetadata = Body(...),
181
+ storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
182
+ ):
183
+ _check_td_exists_for_semantic(tensor_id, storage, api_key, "CREATE")
184
+ if metadata_in.tensor_id != tensor_id:
185
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="tensor_id in path and body must match.")
186
+ try:
187
+ storage.add_semantic_metadata(metadata_in)
188
+ log_audit_event("CREATE_SEMANTIC_METADATA", api_key, str(tensor_id), {"name": metadata_in.name})
189
+ return metadata_in
190
+ except (ValidationError, ValueError) as e:
191
+ log_audit_event("CREATE_SEMANTIC_METADATA_FAILED", api_key, str(tensor_id), {"name": metadata_in.name, "error": str(e)})
192
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST if isinstance(e, ValueError) else status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
193
+
194
+ @router_semantic_metadata.get("/", response_model=List[SemanticMetadata])
195
+ async def get_all_semantic_metadata_for_tensor(tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance)):
196
+ _check_td_exists_for_semantic(tensor_id, storage)
197
+ return storage.get_semantic_metadata(tensor_id)
198
+
199
+ class SemanticMetadataUpdate(BaseModel): name: Optional[str] = None; description: Optional[str] = None
200
+
201
+ @router_semantic_metadata.put("/{current_name}", response_model=SemanticMetadata)
202
+ async def update_named_semantic_metadata_for_tensor(
203
+ tensor_id: UUID = Path(...), current_name: str = Path(...), updates: SemanticMetadataUpdate = Body(...),
204
+ storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
205
+ ):
206
+ _check_td_exists_for_semantic(tensor_id, storage, api_key, "UPDATE")
207
+ if not updates.model_dump(exclude_unset=True):
208
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No update data provided.")
209
+ if not storage.get_semantic_metadata_by_name(tensor_id, current_name):
210
+ log_audit_event("UPDATE_SEMANTIC_METADATA_FAILED_NOT_FOUND", api_key, str(tensor_id), {"name": current_name})
211
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"SemanticMetadata '{current_name}' not found for tensor {tensor_id}.")
212
+ try:
213
+ updated = storage.update_semantic_metadata(tensor_id, current_name, new_description=updates.description, new_name=updates.name)
214
+ log_audit_event(
215
+ "UPDATE_SEMANTIC_METADATA",
216
+ api_key,
217
+ str(tensor_id),
218
+ {"original_name": current_name, "updated_fields": updates.model_dump(exclude_unset=True)},
219
+ )
220
+ return updated
221
+ except (ValidationError, ValueError) as e:
222
+ log_audit_event("UPDATE_SEMANTIC_METADATA_FAILED", api_key, str(tensor_id), {"name": current_name, "error": str(e)})
223
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST if isinstance(e, ValueError) else status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
224
+
225
+ @router_semantic_metadata.delete("/{name}", status_code=status.HTTP_204_NO_CONTENT)
226
+ async def delete_named_semantic_metadata_for_tensor(
227
+ tensor_id: UUID = Path(...), name: str = Path(...),
228
+ storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
229
+ ):
230
+ _check_td_exists_for_semantic(tensor_id, storage, api_key, "DELETE")
231
+ if not storage.get_semantic_metadata_by_name(tensor_id, name):
232
+ log_audit_event("DELETE_SEMANTIC_METADATA_FAILED_NOT_FOUND", api_key, str(tensor_id), {"name": name})
233
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"SemanticMetadata '{name}' not found for tensor {tensor_id}.")
234
+ storage.delete_semantic_metadata(tensor_id, name)
235
+ log_audit_event("DELETE_SEMANTIC_METADATA", api_key, str(tensor_id), {"name": name})
236
+ return None
237
+
238
+ # --- Search and Aggregation Routers (GET - No Auth/Audit) ---
239
+ router_search_aggregate = APIRouter(tags=["Search & Aggregate"])
240
+ @router_search_aggregate.get("/search/tensors/", response_model=List[TensorDescriptor])
241
+ async def search_tensors(
242
+ text_query: str = Query(..., min_length=1), fields_to_search: Optional[List[str]] = Query(None),
243
+ storage: MetadataStorage = Depends(get_storage_instance)
244
+ ):
245
+ default_fields = ["tensor_id", "owner", "tags", "metadata", "semantic.name", "semantic.description", "lineage.source.identifier", "lineage.version", "computational.algorithm"]
246
+ try: return storage.search_tensor_descriptors(text_query, fields_to_search or default_fields)
247
+ except ValueError as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
248
+
249
+ @router_search_aggregate.get("/aggregate/tensors/", response_model=Dict[str, Any])
250
+ async def aggregate_tensors(
251
+ group_by_field: str = Query(...), agg_function: str = Query(...), agg_field: Optional[str] = Query(None),
252
+ storage: MetadataStorage = Depends(get_storage_instance)
253
+ ):
254
+ if agg_function in ["avg", "sum", "min", "max"] and not agg_field:
255
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"'{agg_function}' requires 'agg_field'.")
256
+ try: return storage.aggregate_tensor_descriptors(group_by_field, agg_function, agg_field)
257
+ except ValueError as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
258
+ except NotImplementedError as e: raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail=str(e))
259
+
260
+ # --- Versioning and Lineage Router ---
261
+ router_version_lineage = APIRouter(tags=["Versioning & Lineage"])
262
+ class NewTensorVersionRequest(BaseModel):
263
+ new_version_string: str; dimensionality: Optional[int]=None; shape: Optional[List[int]]=None; data_type: Optional[DataType]=None
264
+ storage_format: Optional[str]=None; owner: Optional[str]=None; access_control: Optional[Dict[str, List[str]]]=None
265
+ byte_size: Optional[int]=None; checksum: Optional[str]=None; compression_info: Optional[Dict[str, Any]]=None
266
+ tags: Optional[List[str]]=None; metadata: Optional[Dict[str, Any]]=None
267
+ lineage_source_identifier: Optional[str]=None; lineage_source_type: Optional[LineageSourceType]=None
268
+
269
+ @router_version_lineage.post("/tensors/{tensor_id}/versions", response_model=TensorDescriptor, status_code=status.HTTP_201_CREATED)
270
+ async def create_tensor_version(
271
+ tensor_id: UUID = Path(...), version_request: NewTensorVersionRequest = Body(...),
272
+ storage: MetadataStorage = Depends(get_storage_instance), api_key: str = Depends(verify_api_key)
273
+ ):
274
+ parent_td = storage.get_tensor_descriptor(tensor_id)
275
+ if not parent_td:
276
+ log_audit_event("CREATE_TENSOR_VERSION_FAILED_PARENT_NOT_FOUND", api_key, str(tensor_id), {"new_version": version_request.new_version_string})
277
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Parent TensorDescriptor ID {tensor_id} not found.")
278
+ new_version_id = uuid.uuid4()
279
+ new_td_data = parent_td.model_dump(
280
+ exclude={"tensor_id", "creation_timestamp", "last_modified_timestamp"}
281
+ )
282
+ for field, value in version_request.model_dump(exclude_unset=True).items():
283
+ if field in TensorDescriptor.model_fields and value is not None: new_td_data[field] = value
284
+ elif field not in ['new_version_string', 'lineage_source_identifier', 'lineage_source_type']:
285
+ if new_td_data.get("metadata") is None: new_td_data["metadata"] = {}
286
+ new_td_data["metadata"][field] = value
287
+ new_td_data.update({
288
+ "tensor_id": new_version_id, "creation_timestamp": datetime.utcnow(),
289
+ "last_modified_timestamp": datetime.utcnow(),
290
+ "owner": new_td_data.get('owner', parent_td.owner),
291
+ "byte_size": new_td_data.get('byte_size', parent_td.byte_size)
292
+ })
293
+ try:
294
+ new_td = TensorDescriptor(**new_td_data)
295
+ storage.add_tensor_descriptor(new_td)
296
+ except (ValidationError, ValueError) as e:
297
+ log_audit_event("CREATE_TENSOR_VERSION_FAILED_VALIDATION", api_key, str(new_version_id), {"parent_id": str(tensor_id), "error": str(e)})
298
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY if isinstance(e, ValidationError) else status.HTTP_400_BAD_REQUEST, detail=str(e))
299
+
300
+ lineage_details = {"tensor_id": new_version_id, "parent_tensors": [ParentTensorLink(tensor_id=tensor_id, relationship="new_version_of")], "version": version_request.new_version_string}
301
+ if version_request.lineage_source_identifier and version_request.lineage_source_type:
302
+ lineage_details["source"] = LineageSource(type=version_request.lineage_source_type, identifier=version_request.lineage_source_identifier) # type: ignore
303
+ storage.add_lineage_metadata(LineageMetadata(**lineage_details))
304
+ log_audit_event("CREATE_TENSOR_VERSION", api_key, str(new_version_id), {"parent_id": str(tensor_id), "version": version_request.new_version_string})
305
+ return new_td
306
+
307
+ @router_version_lineage.get("/tensors/{tensor_id}/versions", response_model=List[TensorDescriptor])
308
+ async def list_tensor_versions(tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance)):
309
+ results = []
310
+ current_td = storage.get_tensor_descriptor(tensor_id)
311
+ if not current_td: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TensorDescriptor ID {tensor_id} not found.")
312
+ results.append(current_td)
313
+ child_ids = storage.get_child_tensor_ids(tensor_id)
314
+ for child_id in child_ids:
315
+ child_lineage = storage.get_lineage_metadata(child_id)
316
+ if child_lineage and any(p.tensor_id == tensor_id and p.relationship == "new_version_of" for p in child_lineage.parent_tensors):
317
+ child_td_obj = storage.get_tensor_descriptor(child_id)
318
+ if child_td_obj: results.append(child_td_obj)
319
+ return results
320
+
321
+ class LineageRelationshipRequest(BaseModel): source_tensor_id: UUID; target_tensor_id: UUID; relationship_type: str; details: Optional[Dict[str, Any]] = None
322
+
323
+ @router_version_lineage.post("/lineage/relationships/", status_code=status.HTTP_201_CREATED)
324
+ async def create_lineage_relationship(
325
+ req: LineageRelationshipRequest, storage: MetadataStorage = Depends(get_storage_instance),
326
+ api_key: str = Depends(verify_api_key)
327
+ ):
328
+ audit_details = req.model_dump()
329
+ if not storage.get_tensor_descriptor(req.source_tensor_id):
330
+ log_audit_event("CREATE_LINEAGE_REL_FAILED_SRC_NOT_FOUND", api_key, details=audit_details)
331
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Source TD {req.source_tensor_id} not found.")
332
+ if not storage.get_tensor_descriptor(req.target_tensor_id):
333
+ log_audit_event("CREATE_LINEAGE_REL_FAILED_TGT_NOT_FOUND", api_key, details=audit_details)
334
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Target TD {req.target_tensor_id} not found.")
335
+
336
+ target_lineage = storage.get_lineage_metadata(req.target_tensor_id) or LineageMetadata(tensor_id=req.target_tensor_id)
337
+ if any(p.tensor_id == req.source_tensor_id and p.relationship == req.relationship_type for p in target_lineage.parent_tensors):
338
+ return {"message": "Relationship already exists.", "lineage": target_lineage}
339
+
340
+ target_lineage.parent_tensors.append(ParentTensorLink(tensor_id=req.source_tensor_id, relationship=req.relationship_type))
341
+ try:
342
+ storage.add_lineage_metadata(target_lineage)
343
+ log_audit_event("CREATE_LINEAGE_RELATIONSHIP", api_key, str(req.target_tensor_id), details=audit_details)
344
+ return {"message": "Lineage relationship created/updated.", "lineage": storage.get_lineage_metadata(req.target_tensor_id)}
345
+ except (ValidationError, ValueError) as e:
346
+ log_audit_event("CREATE_LINEAGE_REL_FAILED_VALIDATION", api_key, str(req.target_tensor_id), {**audit_details, "error": str(e)})
347
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
348
+
349
+ @router_version_lineage.get("/tensors/{tensor_id}/lineage/parents", response_model=List[TensorDescriptor])
350
+ async def get_parent_tensors(tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance)):
351
+ if not storage.get_tensor_descriptor(tensor_id): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TD {tensor_id} not found.")
352
+ parent_ids = storage.get_parent_tensor_ids(tensor_id)
353
+ return [td for pid in parent_ids if (td := storage.get_tensor_descriptor(pid)) is not None]
354
+
355
+ @router_version_lineage.get("/tensors/{tensor_id}/lineage/children", response_model=List[TensorDescriptor])
356
+ async def get_child_tensors(tensor_id: UUID = Path(...), storage: MetadataStorage = Depends(get_storage_instance)):
357
+ if not storage.get_tensor_descriptor(tensor_id): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"TD {tensor_id} not found.")
358
+ child_ids = storage.get_child_tensor_ids(tensor_id)
359
+ return [td for cid in child_ids if (td := storage.get_tensor_descriptor(cid)) is not None]
360
+
361
+ # --- Router for Extended Metadata (CRUD per type) ---
362
+ router_extended_metadata = APIRouter(prefix="/tensor_descriptors/{tensor_id}", tags=["Extended Metadata (Per Tensor)"])
363
+
364
+ def _get_td_or_404_for_extended_meta(tensor_id: UUID, storage: MetadataStorage, api_key: Optional[str]=None, action_prefix: str = ""): # Renamed from previous version
365
+ td = storage.get_tensor_descriptor(tensor_id)
366
+ if not td:
367
+ if api_key and action_prefix: log_audit_event(f"{action_prefix}_METADATA_FAILED_TD_NOT_FOUND", user=api_key, tensor_id=str(tensor_id)) # Generic prefix
368
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Parent TensorDescriptor ID {tensor_id} not found.")
369
+ return td
370
+
371
+ async def _upsert_extended_metadata(tensor_id: UUID, metadata_name_cap: str, metadata_in: Any, storage: MetadataStorage, api_key: str):
372
+ _get_td_or_404_for_extended_meta(tensor_id, storage, api_key, f"UPSERT_{metadata_name_cap}")
373
+ if metadata_in.tensor_id != tensor_id:
374
+ log_audit_event(f"UPSERT_{metadata_name_cap}_FAILED_ID_MISMATCH", api_key, str(tensor_id), {"body_id": str(metadata_in.tensor_id)})
375
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Path tensor_id and body tensor_id must match.")
376
+ try:
377
+ add_method = getattr(storage, f"add_{metadata_name_cap.lower()}_metadata")
378
+ add_method(metadata_in)
379
+ log_audit_event(f"UPSERT_{metadata_name_cap}_METADATA", api_key, str(tensor_id))
380
+ return metadata_in
381
+ except (ValidationError, ValueError) as e:
382
+ log_audit_event(f"UPSERT_{metadata_name_cap}_FAILED", api_key, str(tensor_id), {"error": str(e)})
383
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST if isinstance(e, ValueError) else status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
384
+
385
+ async def _get_extended_metadata_ep(tensor_id: UUID, metadata_name_cap: str, storage: MetadataStorage): # Renamed to avoid clash
386
+ _get_td_or_404_for_extended_meta(tensor_id, storage)
387
+ get_method = getattr(storage, f"get_{metadata_name_cap.lower()}_metadata")
388
+ meta = get_method(tensor_id)
389
+ if not meta: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"{metadata_name_cap}Metadata not found for tensor {tensor_id}.")
390
+ return meta
391
+
392
+ async def _patch_extended_metadata(tensor_id: UUID, metadata_name_cap: str, updates: Dict[str, Any], storage: MetadataStorage, api_key: str):
393
+ _get_td_or_404_for_extended_meta(tensor_id, storage, api_key, f"PATCH_{metadata_name_cap}")
394
+ get_method = getattr(storage, f"get_{metadata_name_cap.lower()}_metadata")
395
+ if not get_method(tensor_id):
396
+ log_audit_event(f"PATCH_{metadata_name_cap}_FAILED_NOT_FOUND", api_key, str(tensor_id))
397
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"{metadata_name_cap}Metadata not found for update.")
398
+ try:
399
+ update_method = getattr(storage, f"update_{metadata_name_cap.lower()}_metadata")
400
+ updated_meta = update_method(tensor_id, **updates)
401
+ log_audit_event(f"PATCH_{metadata_name_cap}_METADATA", api_key, str(tensor_id), {"updated_fields": list(updates.keys())})
402
+ return updated_meta
403
+ except (ValidationError, ValueError) as e:
404
+ log_audit_event(f"PATCH_{metadata_name_cap}_FAILED_VALIDATION", api_key, str(tensor_id), {"error": str(e)})
405
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY if isinstance(e, ValidationError) else status.HTTP_400_BAD_REQUEST, detail=str(e))
406
+
407
+ async def _delete_extended_metadata(tensor_id: UUID, metadata_name_cap: str, storage: MetadataStorage, api_key: str):
408
+ _get_td_or_404_for_extended_meta(tensor_id, storage, api_key, f"DELETE_{metadata_name_cap}")
409
+ delete_method = getattr(storage, f"delete_{metadata_name_cap.lower()}_metadata")
410
+ if not delete_method(tensor_id):
411
+ log_audit_event(f"DELETE_{metadata_name_cap}_FAILED_NOT_FOUND", api_key, str(tensor_id))
412
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"{metadata_name_cap}Metadata not found for delete.")
413
+ log_audit_event(f"DELETE_{metadata_name_cap}_METADATA", api_key, str(tensor_id))
414
+ return None
415
+
416
+ # Explicit CRUD endpoints for each extended metadata type
417
+ @router_extended_metadata.post("/lineage", response_model=LineageMetadata, status_code=status.HTTP_201_CREATED)
418
+ async def upsert_lineage_metadata_ep(tensor_id: UUID=Path(...), lineage_in: LineageMetadata=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
419
+ return await _upsert_extended_metadata(tensor_id, "Lineage", lineage_in, storage, api_key)
420
+ @router_extended_metadata.get("/lineage", response_model=LineageMetadata)
421
+ async def get_lineage_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance)):
422
+ return await _get_extended_metadata_ep(tensor_id, "Lineage", storage)
423
+ @router_extended_metadata.patch("/lineage", response_model=LineageMetadata)
424
+ async def patch_lineage_metadata_ep(tensor_id: UUID=Path(...), updates: Dict[str,Any]=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
425
+ return await _patch_extended_metadata(tensor_id, "Lineage", updates, storage, api_key)
426
+ @router_extended_metadata.delete("/lineage", status_code=status.HTTP_204_NO_CONTENT)
427
+ async def delete_lineage_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
428
+ return await _delete_extended_metadata(tensor_id, "Lineage", storage, api_key)
429
+
430
+ @router_extended_metadata.post("/computational", response_model=ComputationalMetadata, status_code=status.HTTP_201_CREATED)
431
+ async def upsert_computational_metadata_ep(tensor_id: UUID=Path(...), computational_in: ComputationalMetadata=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
432
+ return await _upsert_extended_metadata(tensor_id, "Computational", computational_in, storage, api_key)
433
+ @router_extended_metadata.get("/computational", response_model=ComputationalMetadata)
434
+ async def get_computational_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance)):
435
+ return await _get_extended_metadata_ep(tensor_id, "Computational", storage)
436
+ @router_extended_metadata.patch("/computational", response_model=ComputationalMetadata)
437
+ async def patch_computational_metadata_ep(tensor_id: UUID=Path(...), updates: Dict[str,Any]=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
438
+ return await _patch_extended_metadata(tensor_id, "Computational", updates, storage, api_key)
439
+ @router_extended_metadata.delete("/computational", status_code=status.HTTP_204_NO_CONTENT)
440
+ async def delete_computational_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
441
+ return await _delete_extended_metadata(tensor_id, "Computational", storage, api_key)
442
+
443
+ @router_extended_metadata.post("/quality", response_model=QualityMetadata, status_code=status.HTTP_201_CREATED)
444
+ async def upsert_quality_metadata_ep(tensor_id: UUID=Path(...), quality_in: QualityMetadata=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
445
+ return await _upsert_extended_metadata(tensor_id, "Quality", quality_in, storage, api_key)
446
+ @router_extended_metadata.get("/quality", response_model=QualityMetadata)
447
+ async def get_quality_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance)):
448
+ return await _get_extended_metadata_ep(tensor_id, "Quality", storage)
449
+ @router_extended_metadata.patch("/quality", response_model=QualityMetadata)
450
+ async def patch_quality_metadata_ep(tensor_id: UUID=Path(...), updates: Dict[str,Any]=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
451
+ return await _patch_extended_metadata(tensor_id, "Quality", updates, storage, api_key)
452
+ @router_extended_metadata.delete("/quality", status_code=status.HTTP_204_NO_CONTENT)
453
+ async def delete_quality_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
454
+ return await _delete_extended_metadata(tensor_id, "Quality", storage, api_key)
455
+
456
+ @router_extended_metadata.post("/relational", response_model=RelationalMetadata, status_code=status.HTTP_201_CREATED)
457
+ async def upsert_relational_metadata_ep(tensor_id: UUID=Path(...), relational_in: RelationalMetadata=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
458
+ return await _upsert_extended_metadata(tensor_id, "Relational", relational_in, storage, api_key)
459
+ @router_extended_metadata.get("/relational", response_model=RelationalMetadata)
460
+ async def get_relational_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance)):
461
+ return await _get_extended_metadata_ep(tensor_id, "Relational", storage)
462
+ @router_extended_metadata.patch("/relational", response_model=RelationalMetadata)
463
+ async def patch_relational_metadata_ep(tensor_id: UUID=Path(...), updates: Dict[str,Any]=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
464
+ return await _patch_extended_metadata(tensor_id, "Relational", updates, storage, api_key)
465
+ @router_extended_metadata.delete("/relational", status_code=status.HTTP_204_NO_CONTENT)
466
+ async def delete_relational_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
467
+ return await _delete_extended_metadata(tensor_id, "Relational", storage, api_key)
468
+
469
+ @router_extended_metadata.post("/usage", response_model=UsageMetadata, status_code=status.HTTP_201_CREATED)
470
+ async def upsert_usage_metadata_ep(tensor_id: UUID=Path(...), usage_in: UsageMetadata=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
471
+ return await _upsert_extended_metadata(tensor_id, "Usage", usage_in, storage, api_key)
472
+ @router_extended_metadata.get("/usage", response_model=UsageMetadata)
473
+ async def get_usage_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance)):
474
+ return await _get_extended_metadata_ep(tensor_id, "Usage", storage)
475
+ @router_extended_metadata.patch("/usage", response_model=UsageMetadata)
476
+ async def patch_usage_metadata_ep(tensor_id: UUID=Path(...), updates: Dict[str,Any]=Body(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
477
+ return await _patch_extended_metadata(tensor_id, "Usage", updates, storage, api_key)
478
+ @router_extended_metadata.delete("/usage", status_code=status.HTTP_204_NO_CONTENT)
479
+ async def delete_usage_metadata_ep(tensor_id: UUID=Path(...), storage: MetadataStorage=Depends(get_storage_instance), api_key: str=Depends(verify_api_key)):
480
+ return await _delete_extended_metadata(tensor_id, "Usage", storage, api_key)
481
+
482
+ # --- I/O Router for Export/Import ---
483
+ router_io = APIRouter(prefix="/tensors", tags=["Import/Export"])
484
+
485
+ @router_io.get("/export", response_model=TensorusExportData)
486
+ async def export_tensor_metadata(
487
+ tensor_ids_str: Optional[str] = Query(None, alias="tensor_ids"), # Changed FastAPIQuery to Query
488
+ storage: MetadataStorage = Depends(get_storage_instance)
489
+ ):
490
+ parsed_tensor_ids: Optional[List[UUID]] = None
491
+ if tensor_ids_str:
492
+ try: parsed_tensor_ids = [UUID(tid.strip()) for tid in tensor_ids_str.split(',')]
493
+ except ValueError: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid UUID format in tensor_ids.")
494
+ export_data = storage.get_export_data(tensor_ids=parsed_tensor_ids)
495
+ filename = f"tensorus_export_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json"
496
+ headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
497
+ return JSONResponse(content=export_data.model_dump(mode="json"), headers=headers)
498
+
499
+ @router_io.post("/import", summary="Import Tensor Metadata")
500
+ async def import_tensor_metadata(
501
+ import_data_payload: TensorusExportData,
502
+ conflict_strategy: Annotated[Literal["skip", "overwrite"], Query()] = "skip",
503
+ storage: MetadataStorage = Depends(get_storage_instance),
504
+ api_key: str = Depends(verify_api_key)
505
+ ):
506
+ try:
507
+ result_summary = storage.import_data(import_data_payload, conflict_strategy=conflict_strategy)
508
+ log_audit_event("IMPORT_DATA", api_key, details={"strategy": conflict_strategy, "summary": result_summary})
509
+ return result_summary
510
+ except NotImplementedError:
511
+ log_audit_event("IMPORT_DATA_FAILED_NOT_IMPLEMENTED", api_key, details={"strategy": conflict_strategy})
512
+ raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail="Import functionality is not implemented")
513
+ except Exception as e:
514
+ log_audit_event("IMPORT_DATA_FAILED_UNEXPECTED", api_key, details={"strategy": conflict_strategy, "error": str(e)})
515
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Unexpected error during import: {e}")
516
+
517
+ # --- Management Router for Health and Metrics ---
518
+ router_management = APIRouter(tags=["Management"])
519
+ class HealthResponse(PydanticBaseModel): status: str; backend: str; detail: Optional[str] = None
520
+ @router_management.get("/health", response_model=HealthResponse)
521
+ async def health_check(storage: MetadataStorage = Depends(get_storage_instance)):
522
+ is_healthy, backend_type = storage.check_health()
523
+ if is_healthy: return HealthResponse(status="ok", backend=backend_type)
524
+ else: return JSONResponse(status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
525
+ content=HealthResponse(status="error", backend=backend_type, detail="Storage backend connection failed.").model_dump())
526
+ class MetricsResponse(PydanticBaseModel):
527
+ total_tensor_descriptors: int; semantic_metadata_count: int; lineage_metadata_count: int
528
+ computational_metadata_count: int; quality_metadata_count: int; relational_metadata_count: int; usage_metadata_count: int
529
+ @router_management.get("/metrics", response_model=MetricsResponse)
530
+ async def get_metrics(storage: MetadataStorage = Depends(get_storage_instance)):
531
+ return MetricsResponse(
532
+ total_tensor_descriptors=storage.get_tensor_descriptors_count(),
533
+ semantic_metadata_count=storage.get_extended_metadata_count("SemanticMetadata"),
534
+ lineage_metadata_count=storage.get_extended_metadata_count("LineageMetadata"),
535
+ computational_metadata_count=storage.get_extended_metadata_count("ComputationalMetadata"),
536
+ quality_metadata_count=storage.get_extended_metadata_count("QualityMetadata"),
537
+ relational_metadata_count=storage.get_extended_metadata_count("RelationalMetadata"),
538
+ usage_metadata_count=storage.get_extended_metadata_count("UsageMetadata")
539
+ )
540
+
541
+ # --- Analytics Router ---
542
+ router_analytics = APIRouter(
543
+ prefix="/analytics",
544
+ tags=["Analytics"]
545
+ )
546
+
547
+ @router_analytics.get("/co_occurring_tags", response_model=Dict[str, List[Dict[str, Any]]],
548
+ summary="Get Co-occurring Tags",
549
+ description="Finds tags that frequently co-occur with other tags on tensor descriptors.")
550
+ async def api_get_co_occurring_tags(
551
+ min_co_occurrence: int = Query(2, ge=1, description="Minimum number of times tags must appear together."),
552
+ limit: int = Query(10, ge=1, le=100, description="Maximum number of co-occurring tags to return for each primary tag."),
553
+ storage: MetadataStorage = Depends(get_storage_instance)
554
+ ):
555
+ try:
556
+ return storage.get_co_occurring_tags(min_co_occurrence=min_co_occurrence, limit=limit)
557
+ except NotImplementedError:
558
+ raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail="Co-occurring tags analytics not implemented for the current storage backend.")
559
+ except Exception as e:
560
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error calculating co-occurring tags: {e}")
561
+
562
+
563
+ @router_analytics.get("/stale_tensors", response_model=List[TensorDescriptor],
564
+ summary="Get Stale Tensors",
565
+ description="Finds tensors that have not been accessed or modified for a given number of days.")
566
+ async def api_get_stale_tensors(
567
+ threshold_days: int = Query(90, ge=1, description="Number of days to consider a tensor stale."),
568
+ limit: int = Query(100, ge=1, le=1000, description="Maximum number of stale tensors to return."),
569
+ storage: MetadataStorage = Depends(get_storage_instance)
570
+ ):
571
+ try:
572
+ return storage.get_stale_tensors(threshold_days=threshold_days, limit=limit)
573
+ except NotImplementedError:
574
+ raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail="Stale tensor analytics not implemented for the current storage backend.")
575
+ except Exception as e:
576
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error fetching stale tensors: {e}")
577
+
578
+
579
+ @router_analytics.get("/complex_tensors", response_model=List[TensorDescriptor],
580
+ summary="Get Complex Tensors",
581
+ description="Finds tensors considered complex based on lineage (number of parents or transformation steps).")
582
+ async def api_get_complex_tensors(
583
+ min_parent_count: Optional[int] = Query(None, ge=0, description="Minimum number of parent tensors."),
584
+ min_transformation_steps: Optional[int] = Query(None, ge=0, description="Minimum number of transformation steps."),
585
+ limit: int = Query(100, ge=1, le=1000, description="Maximum number of complex tensors to return."),
586
+ storage: MetadataStorage = Depends(get_storage_instance)
587
+ ):
588
+ if min_parent_count is None and min_transformation_steps is None:
589
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="At least one criterion (min_parent_count or min_transformation_steps) must be provided.")
590
+ try:
591
+ return storage.get_complex_tensors(
592
+ min_parent_count=min_parent_count,
593
+ min_transformation_steps=min_transformation_steps,
594
+ limit=limit
595
+ )
596
+ except ValueError as e:
597
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
598
+ except NotImplementedError:
599
+ raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail="Complex tensor analytics not implemented for the current storage backend.")
600
+ except Exception as e:
601
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error fetching complex tensors: {e}")
tensorus/api/routers/__init__.py ADDED
File without changes
tensorus/api/routers/query.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from typing import Any, Dict, List
3
+
4
+ from ..utils import tensor_to_list
5
+ from ...nql_agent import NQLAgent
6
+ from ...tensor_storage import TensorStorage
7
+ from ...api import NQLQueryRequest, NQLResponse, get_nql_agent
8
+
9
+ router = APIRouter()
10
+
11
+ @router.post("/query", response_model=NQLResponse)
12
+ async def query_route(
13
+ request: NQLQueryRequest,
14
+ nql_agent: NQLAgent = Depends(get_nql_agent),
15
+ ):
16
+ """Execute an NQL query and convert tensors to list for JSON responses."""
17
+ try:
18
+ result = nql_agent.process_query(request.query)
19
+ if result.get("success") and isinstance(result.get("results"), list):
20
+ converted: List[Dict[str, Any]] = []
21
+ for rec in result["results"]:
22
+ shape, dtype, data = tensor_to_list(rec["tensor"])
23
+ converted.append(
24
+ {
25
+ "record_id": rec["metadata"].get("record_id"),
26
+ "shape": shape,
27
+ "dtype": dtype,
28
+ "data": data,
29
+ "metadata": rec["metadata"],
30
+ }
31
+ )
32
+ result["results"] = converted
33
+ return NQLResponse(**result)
34
+ except Exception as exc:
35
+ raise HTTPException(status_code=500, detail=str(exc))
tensorus/api/security.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Security, HTTPException, status, Depends
2
+ from fastapi.security.api_key import APIKeyHeader
3
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
4
+ from typing import Optional, Dict, Any # Added Dict, Any for JWT payload
5
+
6
+ from tensorus.config import settings
7
+ from tensorus.audit import log_audit_event
8
+ from jose import jwt, JWTError
9
+ import requests
10
+
11
+
12
+ class MutableAPIKeyHeader(APIKeyHeader):
13
+ """APIKeyHeader that allows updating the header name for testing."""
14
+
15
+ @property
16
+ def name(self) -> str: # type: ignore[override]
17
+ return self.model.name
18
+
19
+ @name.setter
20
+ def name(self, value: str) -> None: # type: ignore[override]
21
+ self.model.name = value
22
+
23
+ # --- API Key Authentication ---
24
+ api_key_header_auth = MutableAPIKeyHeader(name=settings.API_KEY_HEADER_NAME, auto_error=False)
25
+
26
+ async def verify_api_key(api_key: Optional[str] = Security(api_key_header_auth)):
27
+ """
28
+ Verifies the API key provided in the request header.
29
+ Raises HTTPException if the API key is missing or invalid.
30
+ Returns the API key string if valid.
31
+ """
32
+ if not settings.VALID_API_KEYS:
33
+ # If no API keys are configured, treat as no valid keys configured.
34
+ # Endpoints depending on this will be inaccessible unless keys are provided.
35
+ pass
36
+ if not api_key:
37
+ raise HTTPException(
38
+ status_code=status.HTTP_401_UNAUTHORIZED,
39
+ detail="Missing API Key"
40
+ )
41
+ if not settings.VALID_API_KEYS or api_key not in settings.VALID_API_KEYS:
42
+ # If list is empty OR key is not in the list
43
+ raise HTTPException(
44
+ status_code=status.HTTP_401_UNAUTHORIZED,
45
+ detail="Invalid API Key"
46
+ )
47
+ return api_key
48
+
49
+
50
+ # --- JWT Token Authentication (Conceptual) ---
51
+ oauth2_scheme = HTTPBearer(auto_error=False) # auto_error=False means it won't raise error if token is missing
52
+
53
+ async def verify_jwt_token(token: Optional[HTTPAuthorizationCredentials] = Security(oauth2_scheme)) -> Dict[str, Any]:
54
+ """
55
+ Conceptual JWT token verification dependency.
56
+ - If JWT auth is disabled, denies access if an endpoint specifically requires it (unless in dev dummy mode).
57
+ - If enabled and in dev dummy mode, allows any bearer token string.
58
+ - If enabled and not in dev dummy mode, raises 501 Not Implemented (actual validation needed here).
59
+ """
60
+ if not settings.AUTH_JWT_ENABLED:
61
+ # If JWT auth is globally disabled:
62
+ # If an endpoint *still* tries to use this JWT verifier, it means it expects JWT.
63
+ # So, deny access because the system isn't configured for it.
64
+ # However, if AUTH_DEV_MODE_ALLOW_DUMMY_JWT is true, we might let it pass for local dev convenience
65
+ # even if AUTH_JWT_ENABLED is false (treating dummy mode as an override).
66
+ if settings.AUTH_DEV_MODE_ALLOW_DUMMY_JWT and token: # Token provided, dummy mode on
67
+ return {"sub": "dummy_jwt_user_jwt_disabled_but_dev_mode", "username": "dummy_dev_jwt", "token_type": "dummy_bearer_dev"}
68
+
69
+ # Standard behavior: if JWT is not enabled, this verifier should fail if called.
70
+ raise HTTPException(
71
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE, # Or 403 Forbidden
72
+ detail="JWT authentication is not enabled or configured for this service."
73
+ )
74
+
75
+ # JWT Auth is enabled, proceed.
76
+ if not token:
77
+ raise HTTPException(
78
+ status_code=status.HTTP_401_UNAUTHORIZED,
79
+ detail="Not authenticated via JWT (No token provided)"
80
+ )
81
+
82
+ if settings.AUTH_DEV_MODE_ALLOW_DUMMY_JWT:
83
+ # In dev dummy mode, allow any token value.
84
+ return {"sub": "dummy_jwt_user", "username": "dummy_jwt_user", "token_type": "dummy_bearer", "token_value": token.credentials}
85
+
86
+ if not settings.AUTH_JWT_JWKS_URI:
87
+ raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="JWKS URI not configured")
88
+
89
+ try:
90
+ jwks_data = requests.get(settings.AUTH_JWT_JWKS_URI).json()
91
+ except Exception as e: # pragma: no cover - network issues
92
+ log_audit_event("JWT_VALIDATION_FAILED", details={"error": f"Failed fetching JWKS: {e}"})
93
+ raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Unable to fetch JWKS")
94
+
95
+ unverified_header = jwt.get_unverified_header(token.credentials)
96
+ rsa_key = None
97
+ for key in jwks_data.get("keys", []):
98
+ if key.get("kid") == unverified_header.get("kid"):
99
+ rsa_key = key
100
+ break
101
+
102
+ if rsa_key is None:
103
+ log_audit_event("JWT_VALIDATION_FAILED", details={"error": "kid not found"})
104
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token header")
105
+
106
+ try:
107
+ claims = jwt.decode(
108
+ token.credentials,
109
+ rsa_key,
110
+ algorithms=[settings.AUTH_JWT_ALGORITHM],
111
+ issuer=settings.AUTH_JWT_ISSUER,
112
+ audience=settings.AUTH_JWT_AUDIENCE,
113
+ )
114
+ except JWTError as e:
115
+ log_audit_event("JWT_VALIDATION_FAILED", details={"error": str(e)})
116
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid JWT token")
117
+
118
+ log_audit_event("JWT_VALIDATION_SUCCESS", user=claims.get("sub"), details={"issuer": claims.get("iss"), "aud": claims.get("aud")})
119
+ return claims
120
+
121
+ # Example of how to use it in an endpoint:
122
+ # from fastapi import Depends
123
+ # from .security import verify_api_key
124
+ #
125
+ # @router.post("/some_protected_route", dependencies=[Depends(verify_api_key)])
126
+ # async def protected_route_function():
127
+ # # ...
128
+ #
129
+ # Or if you need the key value:
130
+ # @router.post("/another_route")
131
+ # async def another_route_function(api_key: str = Depends(verify_api_key)):
132
+ # # api_key variable now holds the validated key
133
+ # # ...
tensorus/audit.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Optional, Dict, Any
3
+ import sys
4
+ from tensorus.config import settings
5
+
6
+ # Configure basic logger
7
+ # In a real app, this might be more complex (e.g., JSON logging, log rotation, external service)
8
+ LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
9
+ LOG_DEFAULT_HANDLERS = [logging.StreamHandler(sys.stdout)] # Log to stdout by default
10
+
11
+ # Attempt to create a log file handler.
12
+ # This is a simple file logger; in production, consider more robust solutions.
13
+ try:
14
+ file_handler = logging.FileHandler(settings.AUDIT_LOG_PATH)
15
+ file_handler.setFormatter(logging.Formatter(LOG_FORMAT))
16
+ LOG_DEFAULT_HANDLERS.append(file_handler)
17
+ except IOError:
18
+ # Handle cases where file cannot be opened (e.g. permissions)
19
+ print(
20
+ f"Warning: Could not open {settings.AUDIT_LOG_PATH} for writing. Audit logs will go to stdout only.",
21
+ file=sys.stderr,
22
+ )
23
+
24
+
25
+ logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, handlers=LOG_DEFAULT_HANDLERS)
26
+ audit_logger = logging.getLogger("tensorus.audit")
27
+
28
+
29
+ def log_audit_event(
30
+ action: str,
31
+ user: Optional[str] = "anonymous",
32
+ tensor_id: Optional[str] = None, # Make tensor_id a common, optional parameter
33
+ details: Optional[Dict[str, Any]] = None
34
+ ):
35
+ """
36
+ Logs an audit event.
37
+
38
+ Args:
39
+ action: A string describing the action performed (e.g., "CREATE_TENSOR_DESCRIPTOR").
40
+ user: The user or API key performing the action. Defaults to "anonymous".
41
+ tensor_id: The primary tensor_id involved in the action, if applicable.
42
+ details: A dictionary of additional relevant information about the event.
43
+ """
44
+ log_message_parts = [f"Action: {action}"]
45
+
46
+ log_message_parts.append(f"User: {user if user else 'unknown'}")
47
+
48
+ if tensor_id:
49
+ log_message_parts.append(f"TensorID: {tensor_id}")
50
+
51
+ if details:
52
+ # Convert details dict to a string format, e.g., key1=value1, key2=value2
53
+ details_str = ", ".join([f"{k}={v}" for k, v in details.items()])
54
+ log_message_parts.append(f"Details: [{details_str}]")
55
+
56
+ audit_logger.info(" | ".join(log_message_parts))
57
+
58
+
59
+ # Example Usage (not part of the library code, just for demonstration):
60
+ if __name__ == "__main__":
61
+ # This will only run if the script is executed directly
62
+ log_audit_event(action="TEST_EVENT", user="test_user", tensor_id="dummy-uuid-123", details={"param1": "value1", "status": "success"})
63
+ audit_logger.info("This is a direct info log from audit_logger for testing handlers.")
64
+ audit_logger.warning("This is a warning log for testing handlers.")
65
+ print(f"Audit logger '{audit_logger.name}' has handlers: {audit_logger.handlers}")
66
+ if not audit_logger.handlers:
67
+ print("Warning: Audit logger has no handlers configured if run outside main app context without direct basicConfig call here.")
68
+ # In the application, basicConfig in this file should set up the handlers globally once.
tensorus/automl_agent.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # automl_agent.py
2
+ """
3
+ Implements the AutoML Agent for Tensorus.
4
+
5
+ This agent performs basic hyperparameter optimization using random search.
6
+ It trains a simple dummy model on synthetic data, evaluates its performance,
7
+ and logs the results, including storing trial results in TensorStorage.
8
+
9
+ Future Enhancements:
10
+ - Implement more advanced search strategies (Bayesian Optimization, Hyperband).
11
+ - Allow configuration of different model architectures.
12
+ - Integrate with real datasets from TensorStorage.
13
+ - Implement early stopping and other training optimizations.
14
+ - Store best model state_dict (requires serialization strategy).
15
+ - Parallelize trials for faster search.
16
+ - Use dedicated hyperparameter optimization libraries (Optuna, Ray Tune).
17
+ """
18
+
19
+ import torch
20
+ import torch.nn as nn
21
+ import torch.optim as optim
22
+ import numpy as np
23
+ import random
24
+ import logging
25
+ import time
26
+ from typing import Dict, Any, Callable, Tuple, Optional
27
+
28
+ from .tensor_storage import TensorStorage # Import our storage module
29
+
30
+ # Configure logging
31
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ # --- Dummy Model Definition ---
36
+ class DummyMLP(nn.Module):
37
+ """A simple Multi-Layer Perceptron for regression/classification."""
38
+ def __init__(self, input_dim: int, output_dim: int, hidden_size: int = 64, activation_fn: Callable = nn.ReLU):
39
+ super().__init__()
40
+ self.layer_1 = nn.Linear(input_dim, hidden_size)
41
+ self.activation = activation_fn()
42
+ self.layer_2 = nn.Linear(hidden_size, output_dim)
43
+
44
+ def forward(self, x):
45
+ x = self.activation(self.layer_1(x))
46
+ x = self.layer_2(x)
47
+ return x
48
+
49
+
50
+ # --- AutoML Agent Class ---
51
+ class AutoMLAgent:
52
+ """Performs random search hyperparameter optimization."""
53
+
54
+ def __init__(self,
55
+ tensor_storage: TensorStorage,
56
+ search_space: Dict[str, Callable[[], Any]],
57
+ input_dim: int,
58
+ output_dim: int,
59
+ task_type: str = 'regression', # 'regression' or 'classification'
60
+ results_dataset: str = "automl_results"):
61
+ """
62
+ Initializes the AutoML Agent.
63
+
64
+ Args:
65
+ tensor_storage: An instance of TensorStorage.
66
+ search_space: Dictionary defining the hyperparameter search space.
67
+ Keys are param names (e.g., 'lr', 'hidden_size').
68
+ Values are functions that sample a value for that param (e.g., lambda: 10**random.uniform(-4,-2)).
69
+ input_dim: Input dimension for the dummy model.
70
+ output_dim: Output dimension for the dummy model.
71
+ task_type: Type of task ('regression' or 'classification'), influences loss and data generation.
72
+ results_dataset: Name of the dataset in TensorStorage to store trial results.
73
+ """
74
+ if not isinstance(tensor_storage, TensorStorage):
75
+ raise TypeError("tensor_storage must be an instance of TensorStorage")
76
+ if task_type not in ['regression', 'classification']:
77
+ raise ValueError("task_type must be 'regression' or 'classification'")
78
+
79
+ self.tensor_storage = tensor_storage
80
+ self.search_space = search_space
81
+ self.input_dim = input_dim
82
+ self.output_dim = output_dim
83
+ self.task_type = task_type
84
+ self.results_dataset = results_dataset
85
+
86
+ # Ensure results dataset exists
87
+ try:
88
+ self.tensor_storage.get_dataset(self.results_dataset)
89
+ except ValueError:
90
+ logger.info(f"Dataset '{self.results_dataset}' not found. Creating it.")
91
+ self.tensor_storage.create_dataset(self.results_dataset)
92
+
93
+ # Track best results found during the search
94
+ self.best_score: Optional[float] = None # Use negative infinity for maximization tasks if needed
95
+ self.best_params: Optional[Dict[str, Any]] = None
96
+ # Assuming lower score is better (e.g., loss)
97
+ self.higher_score_is_better = False if task_type == 'regression' else True # Accuracy for classification
98
+
99
+
100
+ # Device configuration
101
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
+ logger.info(f"AutoML Agent using device: {self.device}")
103
+ logger.info(f"AutoML Agent initialized for {task_type} task. Results stored in '{results_dataset}'.")
104
+
105
+
106
+ def _generate_synthetic_data(self, n_samples=500, batch_size=32) -> Tuple[Any, Any]:
107
+ """Generates synthetic data loaders for training and validation."""
108
+ X = torch.randn(n_samples, self.input_dim, device=self.device)
109
+
110
+ if self.task_type == 'regression':
111
+ # Simple linear relationship with noise
112
+ true_weight = torch.randn(self.input_dim, self.output_dim, device=self.device) * 2
113
+ true_bias = torch.randn(self.output_dim, device=self.device)
114
+ y = X @ true_weight + true_bias + torch.randn(n_samples, self.output_dim, device=self.device) * 0.5
115
+ loss_fn = nn.MSELoss()
116
+ else: # classification
117
+ # Simple linear separation + softmax for multi-class
118
+ if self.output_dim <= 1:
119
+ raise ValueError("Output dimension must be > 1 for classification task example.")
120
+ true_weight = torch.randn(self.input_dim, self.output_dim, device=self.device)
121
+ logits = X @ true_weight
122
+ y = torch.softmax(logits, dim=1).argmax(dim=1) # Get class labels
123
+ loss_fn = nn.CrossEntropyLoss()
124
+
125
+ # Simple split
126
+ split_idx = int(n_samples * 0.8)
127
+ X_train, X_val = X[:split_idx], X[split_idx:]
128
+ y_train, y_val = y[:split_idx], y[split_idx:]
129
+
130
+ train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
131
+ val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
132
+
133
+ train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
134
+ val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)
135
+
136
+ return train_loader, val_loader, loss_fn
137
+
138
+
139
+ def _build_dummy_model(self, params: Dict[str, Any]) -> nn.Module:
140
+ """Builds the dummy MLP model based on hyperparameters."""
141
+ hidden_size = params.get('hidden_size', 64) # Default if not in params
142
+ activation_name = params.get('activation', 'relu') # Default activation
143
+
144
+ act_fn_map = {'relu': nn.ReLU, 'tanh': nn.Tanh, 'sigmoid': nn.Sigmoid}
145
+ activation_fn = act_fn_map.get(activation_name.lower(), nn.ReLU) # Default to ReLU if unknown
146
+
147
+ model = DummyMLP(
148
+ input_dim=self.input_dim,
149
+ output_dim=self.output_dim,
150
+ hidden_size=hidden_size,
151
+ activation_fn=activation_fn
152
+ ).to(self.device)
153
+ return model
154
+
155
+
156
+ def _train_and_evaluate(self, params: Dict[str, Any], num_epochs: int = 5) -> Optional[float]:
157
+ """Trains and evaluates a model with given hyperparameters."""
158
+ logger.debug(f"Training trial with params: {params}")
159
+ start_time = time.time()
160
+
161
+ try:
162
+ # 1. Build Model
163
+ model = self._build_dummy_model(params)
164
+
165
+ # 2. Get Data and Loss Function
166
+ train_loader, val_loader, loss_fn = self._generate_synthetic_data()
167
+
168
+ # 3. Setup Optimizer
169
+ lr = params.get('lr', 1e-3) # Default LR
170
+ optimizer = optim.Adam(model.parameters(), lr=lr)
171
+
172
+ # 4. Training Loop
173
+ model.train()
174
+ for epoch in range(num_epochs):
175
+ epoch_loss = 0
176
+ for batch_X, batch_y in train_loader:
177
+ optimizer.zero_grad()
178
+ outputs = model(batch_X)
179
+ loss = loss_fn(outputs, batch_y)
180
+
181
+ # Check for NaN/inf loss
182
+ if not torch.isfinite(loss):
183
+ logger.warning(f"Trial failed: Non-finite loss detected during training epoch {epoch}. Params: {params}")
184
+ return None # Indicate failure
185
+
186
+ loss.backward()
187
+ optimizer.step()
188
+ epoch_loss += loss.item()
189
+ # logger.debug(f" Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss/len(train_loader):.4f}")
190
+
191
+
192
+ # 5. Evaluation Loop
193
+ model.eval()
194
+ total_val_loss = 0
195
+ total_correct = 0
196
+ total_samples = 0
197
+ with torch.no_grad():
198
+ for batch_X, batch_y in val_loader:
199
+ outputs = model(batch_X)
200
+ loss = loss_fn(outputs, batch_y)
201
+ total_val_loss += loss.item()
202
+
203
+ if self.task_type == 'classification':
204
+ predicted = outputs.argmax(dim=1)
205
+ total_correct += (predicted == batch_y).sum().item()
206
+ total_samples += batch_y.size(0)
207
+
208
+ avg_val_loss = total_val_loss / len(val_loader)
209
+ duration = time.time() - start_time
210
+ logger.debug(f"Trial completed in {duration:.2f}s. Val Loss: {avg_val_loss:.4f}")
211
+
212
+
213
+ # 6. Return Score
214
+ if self.task_type == 'regression':
215
+ score = avg_val_loss # Lower is better
216
+ else: # classification
217
+ accuracy = total_correct / total_samples if total_samples > 0 else 0
218
+ score = accuracy # Higher is better
219
+ logger.debug(f" Trial Val Accuracy: {accuracy:.4f}")
220
+
221
+ return score
222
+
223
+ except Exception as e:
224
+ logger.error(f"Trial failed with exception for params {params}: {e}", exc_info=True)
225
+ return None # Indicate failure
226
+
227
+
228
+ def hyperparameter_search(self, trials: int, num_epochs_per_trial: int = 5) -> Optional[Dict[str, Any]]:
229
+ """
230
+ Performs random search for the specified number of trials.
231
+
232
+ Args:
233
+ trials: The number of hyperparameter configurations to try.
234
+ num_epochs_per_trial: Number of epochs to train each model configuration.
235
+
236
+ Returns:
237
+ The dictionary of hyperparameters that achieved the best score, or None if no trial succeeded.
238
+ """
239
+ logger.info(f"--- Starting Hyperparameter Search ({trials} trials) ---")
240
+ self.best_score = None
241
+ self.best_params = None
242
+
243
+ for i in range(trials):
244
+ # 1. Sample hyperparameters
245
+ current_params = {name: sampler() for name, sampler in self.search_space.items()}
246
+ logger.info(f"Trial {i+1}/{trials}: Testing params: {current_params}")
247
+
248
+ # 2. Train and evaluate
249
+ score = self._train_and_evaluate(current_params, num_epochs=num_epochs_per_trial)
250
+
251
+ # 3. Store results in TensorStorage (even if trial failed, record params and score=None)
252
+ score_tensor = torch.tensor(float('nan') if score is None else score) # Store NaN for failed trials
253
+ trial_metadata = {
254
+ "trial_id": i + 1,
255
+ "params": current_params, # Store params dict directly in metadata
256
+ "score": score, # Store score also in metadata for easier querying
257
+ "task_type": self.task_type,
258
+ "search_timestamp_utc": time.time(),
259
+ "created_by": "AutoMLAgent" # Add agent source
260
+ }
261
+ try:
262
+ record_id = self.tensor_storage.insert(
263
+ self.results_dataset,
264
+ score_tensor,
265
+ trial_metadata
266
+ )
267
+ logger.debug(f"Stored trial {i+1} results (Score: {score}) with record ID: {record_id}")
268
+ except Exception as e:
269
+ logger.error(f"Failed to store trial {i+1} results in TensorStorage: {e}")
270
+
271
+
272
+ # 4. Update best score if trial succeeded and is better
273
+ if score is not None:
274
+ is_better = False
275
+ if self.best_score is None:
276
+ is_better = True
277
+ elif self.higher_score_is_better and score > self.best_score:
278
+ is_better = True
279
+ elif not self.higher_score_is_better and score < self.best_score:
280
+ is_better = True
281
+
282
+ if is_better:
283
+ self.best_score = score
284
+ self.best_params = current_params
285
+ logger.info(f"*** New best score found! Trial {i+1}: Score={score:.4f}, Params={current_params} ***")
286
+
287
+
288
+ logger.info(f"--- Hyperparameter Search Finished ---")
289
+ if self.best_params:
290
+ logger.info(f"Best score overall: {self.best_score:.4f}")
291
+ logger.info(f"Best hyperparameters found: {self.best_params}")
292
+ # Optional: Here you could trigger saving the best model's state_dict
293
+ # e.g., self._save_best_model(self.best_params)
294
+ else:
295
+ logger.warning("No successful trials completed. Could not determine best parameters.")
296
+
297
+ return self.best_params
298
+
299
+
300
+ # --- Example Usage ---
301
+ if __name__ == "__main__":
302
+ print("--- Starting AutoML Agent Example ---")
303
+
304
+ # 1. Setup TensorStorage
305
+ storage = TensorStorage()
306
+
307
+ # 2. Define Search Space
308
+ # Simple example for MLP regression
309
+ search_space_reg = {
310
+ 'lr': lambda: 10**random.uniform(-5, -2), # Log uniform for learning rate
311
+ 'hidden_size': lambda: random.choice([32, 64, 128, 256]),
312
+ 'activation': lambda: random.choice(['relu', 'tanh'])
313
+ }
314
+
315
+ # 3. Create the AutoML Agent
316
+ input_dim = 10
317
+ output_dim = 1 # Regression task
318
+ automl_agent = AutoMLAgent(
319
+ tensor_storage=storage,
320
+ search_space=search_space_reg,
321
+ input_dim=input_dim,
322
+ output_dim=output_dim,
323
+ task_type='regression',
324
+ results_dataset="automl_regression_results"
325
+ )
326
+
327
+ # 4. Run the hyperparameter search
328
+ num_trials = 20 # Number of random configurations to test
329
+ num_epochs = 10 # Epochs per trial (keep low for speed)
330
+ best_hyperparams = automl_agent.hyperparameter_search(trials=num_trials, num_epochs_per_trial=num_epochs)
331
+
332
+
333
+ # 5. Optional: Check TensorStorage for results
334
+ print("\n--- Checking TensorStorage contents (Sample) ---")
335
+ try:
336
+ results_count = len(storage.get_dataset(automl_agent.results_dataset))
337
+ print(f"Found {results_count} trial records in '{automl_agent.results_dataset}'.")
338
+
339
+ if results_count > 0:
340
+ print("\nExample trial record (metadata):")
341
+ # Sample one record to show structure
342
+ sample_trial = storage.sample_dataset(automl_agent.results_dataset, 1)
343
+ if sample_trial:
344
+ print(f" Metadata: {sample_trial[0]['metadata']}")
345
+ print(f" Score Tensor: {sample_trial[0]['tensor']}") # Should contain the score or NaN
346
+
347
+ # You could also query for the best score using NQLAgent if needed
348
+ # (e.g., find record where score = best_score) - requires parsing results first.
349
+
350
+ except ValueError as e:
351
+ print(f"Could not retrieve dataset '{automl_agent.results_dataset}': {e}")
352
+ except Exception as e:
353
+ print(f"An error occurred checking storage: {e}")
354
+
355
+
356
+ print("\n--- AutoML Agent Example Finished ---")
tensorus/config.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from typing import Optional
3
+ from pydantic import field_validator
4
+
5
+ class Settings(BaseSettings):
6
+ # Configuration for environment variable prefix, .env file, etc.
7
+ # For Pydantic-Settings v2, environment variables are loaded by default.
8
+ # To specify a prefix for env vars (e.g. TENSORUS_STORAGE_BACKEND):
9
+ # model_config = SettingsConfigDict(env_prefix='TENSORUS_') # Pydantic v2
10
+ # For Pydantic v1, it was `Config.env_prefix`.
11
+
12
+ STORAGE_BACKEND: str = "in_memory"
13
+
14
+ POSTGRES_HOST: Optional[str] = None
15
+ POSTGRES_PORT: Optional[int] = 5432 # Default PostgreSQL port
16
+ POSTGRES_USER: Optional[str] = None
17
+ POSTGRES_PASSWORD: Optional[str] = None
18
+ POSTGRES_DB: Optional[str] = None
19
+ POSTGRES_DSN: Optional[str] = None # Alternative to individual params
20
+
21
+ # Example of how to load from a .env file if needed (not strictly required by subtask)
22
+ # model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8', extra='ignore')
23
+
24
+
25
+ # Global instance of the settings
26
+ # The environment variables will be loaded when this instance is created.
27
+ # e.g. TENSORUS_STORAGE_BACKEND=postgres will override the default.
28
+ # Note: For pydantic-settings, env var names are case-insensitive by default for matching.
29
+ # If env_prefix is set, it would be TENSORUS_STORAGE_BACKEND. Without it, it's just STORAGE_BACKEND.
30
+ # Let's assume no prefix for now, so environment variables should be STORAGE_BACKEND, POSTGRES_HOST etc.
31
+ # OR, more commonly, one would use the env_prefix.
32
+ # For this exercise, I will assume the user will set environment variables like:
33
+ # export STORAGE_BACKEND="postgres"
34
+ # export POSTGRES_USER="myuser"
35
+ # ...etc.
36
+ # Or, if using an .env file:
37
+ # STORAGE_BACKEND="postgres"
38
+ # POSTGRES_USER="myuser"
39
+ # ...
40
+ #
41
+ # For Pydantic V1 BaseSettings, it would be:
42
+ # class Settings(BaseSettings):
43
+ # STORAGE_BACKEND: str = "in_memory"
44
+ # # ... other fields
45
+ # class Config:
46
+ # env_prefix = "TENSORUS_" # e.g. TENSORUS_STORAGE_BACKEND
47
+ # # case_sensitive = False # for Pydantic V1
48
+ #
49
+ # Given the project uses pydantic 1.10, I will use the V1 style for env_prefix.
50
+
51
+ class SettingsV1(BaseSettings):
52
+ STORAGE_BACKEND: str = "in_memory"
53
+ POSTGRES_HOST: Optional[str] = None
54
+ POSTGRES_PORT: Optional[int] = 5432
55
+ POSTGRES_USER: Optional[str] = None
56
+ POSTGRES_PASSWORD: Optional[str] = None
57
+ POSTGRES_DB: Optional[str] = None
58
+ POSTGRES_DSN: Optional[str] = None
59
+
60
+ # API Security
61
+ # Accepts a comma-separated string or JSON list via env var
62
+ VALID_API_KEYS: list[str] | str = []
63
+ API_KEY_HEADER_NAME: str = "X-API-KEY"
64
+ AUDIT_LOG_PATH: str = "tensorus_audit.log"
65
+
66
+
67
+
68
+
69
+ model_config = SettingsConfigDict(env_prefix="TENSORUS_", case_sensitive=False)
70
+
71
+ @field_validator("VALID_API_KEYS", mode="before")
72
+ def split_valid_api_keys(cls, v):
73
+ """Allow comma-separated or JSON list values from the environment."""
74
+ if isinstance(v, str):
75
+ try:
76
+ import json
77
+ parsed = json.loads(v)
78
+ if isinstance(parsed, list):
79
+ return [str(item) for item in parsed]
80
+ except Exception:
81
+ pass
82
+ return [key.strip() for key in v.split(',') if key.strip()]
83
+ return v
84
+
85
+ # JWT Authentication (Conceptual Settings)
86
+ AUTH_JWT_ENABLED: bool = False
87
+ AUTH_JWT_ISSUER: Optional[str] = None
88
+ AUTH_JWT_AUDIENCE: Optional[str] = None
89
+ AUTH_JWT_ALGORITHM: str = "RS256"
90
+ AUTH_JWT_JWKS_URI: Optional[str] = None
91
+ AUTH_DEV_MODE_ALLOW_DUMMY_JWT: bool = False
92
+
93
+
94
+ # Use SettingsV1 for Pydantic v1.x compatibility
95
+ settings = SettingsV1()
tensorus/dummy_env.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dummy_env.py
2
+ """
3
+ A simple dummy environment for testing the RL agent.
4
+ State: Position (1D)
5
+ Action: Move left (-1), Stay (0), Move right (+1) (Discrete actions)
6
+ Goal: Reach position 0
7
+ Reward: -abs(position), +10 if at goal
8
+ """
9
+ from typing import Tuple, Dict
10
+ import torch
11
+ import random
12
+ import numpy as np # Use numpy for state representation convenience
13
+
14
+ class DummyEnv:
15
+ def __init__(self, max_steps=50):
16
+ self.state_dim = 1 # Position is the only state variable
17
+ self.action_dim = 3 # Actions: 0 (left), 1 (stay), 2 (right)
18
+ self.max_steps = max_steps
19
+ self.current_pos = 0.0
20
+ self.steps_taken = 0
21
+ self.goal_pos = 0.0
22
+ self.max_pos = 5.0 # Boundaries
23
+
24
+ def reset(self) -> torch.Tensor:
25
+ """Resets the environment to a random starting position."""
26
+ self.current_pos = random.uniform(-self.max_pos, self.max_pos)
27
+ self.steps_taken = 0
28
+ # Return state as a PyTorch tensor
29
+ return torch.tensor([self.current_pos], dtype=torch.float32)
30
+
31
+ def step(self, action: int) -> Tuple[torch.Tensor, float, bool, Dict]:
32
+ """Takes an action, updates the state, and returns results."""
33
+ if not isinstance(action, int) or action not in [0, 1, 2]:
34
+ raise ValueError(f"Invalid action: {action}. Must be 0, 1, or 2.")
35
+
36
+ # Update position based on action
37
+ if action == 0: # Move left
38
+ self.current_pos -= 0.5
39
+ elif action == 2: # Move right
40
+ self.current_pos += 0.5
41
+ # Action 1 (stay) does nothing to position
42
+
43
+ # Clip position to boundaries
44
+ self.current_pos = np.clip(self.current_pos, -self.max_pos, self.max_pos)
45
+
46
+ self.steps_taken += 1
47
+
48
+ # Calculate reward
49
+ # Higher reward closer to the goal, large penalty for being far
50
+ reward = -abs(self.current_pos - self.goal_pos) * 0.1 # Small penalty for distance
51
+ done = False
52
+
53
+ # Check if goal is reached (within a small tolerance)
54
+ if abs(self.current_pos - self.goal_pos) < 0.1:
55
+ reward += 10.0 # Bonus for reaching goal
56
+ done = True
57
+
58
+ # Check if max steps exceeded
59
+ if self.steps_taken >= self.max_steps:
60
+ done = True
61
+ # Optional: small penalty for running out of time
62
+ # reward -= 1.0
63
+
64
+ # Return next state, reward, done flag, and info dict
65
+ next_state = torch.tensor([self.current_pos], dtype=torch.float32)
66
+ info = {} # Empty info dict for now
67
+
68
+ return next_state, float(reward), done, info
69
+
70
+ # Example Usage
71
+ if __name__ == "__main__":
72
+ env = DummyEnv()
73
+ state = env.reset()
74
+ print(f"Initial state: {state.item()}")
75
+ done = False
76
+ total_reward = 0
77
+ steps = 0
78
+
79
+ while not done:
80
+ action = random.choice([0, 1, 2]) # Take random action
81
+ next_state, reward, done, _ = env.step(action)
82
+ print(f"Step {steps+1}: Action={action}, Next State={next_state.item():.2f}, Reward={reward:.2f}, Done={done}")
83
+ state = next_state
84
+ total_reward += reward
85
+ steps += 1
86
+ if steps > env.max_steps + 5: # Safety break
87
+ print("Exceeded max steps significantly, breaking.")
88
+ break
89
+
90
+
91
+ print(f"\nEpisode finished after {steps} steps. Total reward: {total_reward:.2f}")
tensorus/financial_data_generator.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import datetime
4
+
5
+ def generate_financial_data(
6
+ days: int = 500,
7
+ initial_price: float = 100.0,
8
+ trend_slope: float = 0.05,
9
+ seasonality_amplitude: float = 10.0,
10
+ seasonality_period: int = 90,
11
+ noise_level: float = 2.0,
12
+ base_volume: int = 100000,
13
+ volume_volatility: float = 0.3,
14
+ start_date_str: str = "2022-01-01"
15
+ ) -> pd.DataFrame:
16
+ """
17
+ Generates synthetic time series data resembling daily stock prices.
18
+
19
+ Args:
20
+ days (int): Number of trading days to generate data for.
21
+ initial_price (float): Starting price for the stock.
22
+ trend_slope (float): Slope for the linear trend component (price change per day).
23
+ seasonality_amplitude (float): Amplitude of the seasonal (sine wave) component.
24
+ seasonality_period (int): Period in days for the seasonality.
25
+ noise_level (float): Standard deviation of the random noise added to the price.
26
+ base_volume (int): Base daily trading volume.
27
+ volume_volatility (float): Percentage volatility for volume (e.g., 0.3 for +/-30%).
28
+ start_date_str (str): Start date for the data in 'YYYY-MM-DD' format.
29
+
30
+ Returns:
31
+ pd.DataFrame: A DataFrame with columns ['Date', 'Close', 'Volume'].
32
+ 'Date' is datetime, 'Close' is float, 'Volume' is int.
33
+ """
34
+
35
+ # 1. Create date range
36
+ try:
37
+ start_date = datetime.datetime.strptime(start_date_str, "%Y-%m-%d")
38
+ except ValueError:
39
+ raise ValueError("start_date_str must be in 'YYYY-MM-DD' format")
40
+
41
+ # Using pandas date_range for business days if needed, or simple days
42
+ # For simplicity, using consecutive days. For actual trading days, use bdate_range.
43
+ dates = pd.date_range(start_date, periods=days, freq='D')
44
+
45
+ # Time component for trend and seasonality
46
+ time_component = np.arange(days)
47
+
48
+ # 2. Generate linear trend component
49
+ trend = trend_slope * time_component
50
+
51
+ # 3. Generate seasonal component (sine wave)
52
+ seasonal = seasonality_amplitude * np.sin(2 * np.pi * time_component / seasonality_period)
53
+
54
+ # 4. Generate random noise
55
+ noise = np.random.normal(loc=0, scale=noise_level, size=days)
56
+
57
+ # 5. Calculate Close price
58
+ close_prices = initial_price + trend + seasonal + noise
59
+ # Ensure prices don't go below a certain minimum (e.g., 1.0)
60
+ close_prices = np.maximum(close_prices, 1.0)
61
+
62
+ # 6. Generate Volume data
63
+ volume_random_factor = np.random.uniform(-volume_volatility, volume_volatility, size=days)
64
+ volumes = base_volume * (1 + volume_random_factor)
65
+ # Ensure volume is positive and integer
66
+ volumes = np.maximum(volumes, 0).astype(int)
67
+
68
+ # 7. Create DataFrame
69
+ df = pd.DataFrame({
70
+ 'Date': dates,
71
+ 'Close': close_prices,
72
+ 'Volume': volumes
73
+ })
74
+
75
+ # For OHLC (Optional - can be expanded later)
76
+ # Open: Could be previous day's close, or close +/- small random factor
77
+ # High: Close + small positive random factor
78
+ # Low: Close - small positive random factor (ensure Low <= Open/Close and High >= Open/Close)
79
+ # For now, these are omitted as per requirements.
80
+
81
+ return df
82
+
83
+ if __name__ == "__main__":
84
+ # Generate data with default parameters
85
+ print("Generating synthetic financial data with default parameters...")
86
+ financial_df = generate_financial_data()
87
+
88
+ # Print head and tail
89
+ print("\nDataFrame Head:")
90
+ print(financial_df.head())
91
+ print("\nDataFrame Tail:")
92
+ print(financial_df.tail())
93
+ print(f"\nGenerated {len(financial_df)} days of data.")
94
+
95
+ # Save to CSV
96
+ output_filename = "synthetic_financial_data.csv"
97
+ try:
98
+ financial_df.to_csv(output_filename, index=False)
99
+ print(f"\nSuccessfully saved data to {output_filename}")
100
+ except Exception as e:
101
+ print(f"\nError saving data to CSV: {e}")
102
+
103
+ # Example with custom parameters
104
+ print("\nGenerating synthetic financial data with custom parameters...")
105
+ custom_financial_df = generate_financial_data(
106
+ days=100,
107
+ initial_price=50.0,
108
+ trend_slope=-0.1, # Downward trend
109
+ seasonality_amplitude=5.0,
110
+ seasonality_period=30,
111
+ noise_level=1.0,
112
+ base_volume=50000,
113
+ start_date_str="2023-01-01"
114
+ )
115
+ print("\nCustom DataFrame Head:")
116
+ print(custom_financial_df.head())
117
+ custom_output_filename = "custom_synthetic_financial_data.csv"
118
+ try:
119
+ custom_financial_df.to_csv(custom_output_filename, index=False)
120
+ print(f"\nSuccessfully saved custom data to {custom_output_filename}")
121
+ except Exception as e:
122
+ print(f"\nError saving custom data to CSV: {e}")
tensorus/ingestion_agent.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ingestion_agent.py
2
+ """
3
+ Implements the Autonomous Data Ingestion Agent for Tensorus.
4
+
5
+ This agent monitors a source directory for new data files (e.g., CSV, images),
6
+ preprocesses them into tensors using configurable functions, performs basic
7
+ validation, and inserts them into a specified dataset in TensorStorage.
8
+
9
+ Future Enhancements:
10
+ - Monitor cloud storage (S3, GCS) and APIs.
11
+ - More robust error handling for malformed files.
12
+ - More sophisticated duplicate detection (e.g., file hashing).
13
+ - Support for streaming data sources.
14
+ - Asynchronous processing for higher throughput.
15
+ - More complex and configurable preprocessing pipelines.
16
+ - Schema validation against predefined dataset schemas.
17
+ - Resource management controls.
18
+ """
19
+
20
+ import os
21
+ import time
22
+ import glob
23
+ import logging
24
+ import threading
25
+ import csv
26
+ from PIL import Image
27
+ import torch
28
+ import torchvision.transforms as T # Use torchvision for image transforms
29
+ import collections # Added import
30
+
31
+ from typing import Dict, Callable, Optional, Tuple, List, Any
32
+ from .tensor_storage import TensorStorage # Import our storage module
33
+
34
+ # Configure basic logging (can be customized further)
35
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # --- Custom Log Handler ---
40
+ class AgentMemoryLogHandler(logging.Handler):
41
+ """
42
+ A custom logging handler that stores log records in a deque.
43
+ """
44
+ def __init__(self, deque: collections.deque):
45
+ super().__init__()
46
+ self.deque = deque
47
+
48
+ def emit(self, record: logging.LogRecord) -> None:
49
+ self.deque.append(self.format(record))
50
+
51
+
52
+ # --- Default Preprocessing Functions ---
53
+
54
+ def preprocess_csv(file_path: str) -> Tuple[Optional[torch.Tensor], Dict[str, Any]]:
55
+ """
56
+ Basic CSV preprocessor. Assumes numeric data.
57
+ Reads a CSV, converts rows to tensors (one tensor per row or one tensor for the whole file).
58
+ Returns a single tensor representing the whole file for simplicity here.
59
+ """
60
+ metadata = {"source_file": file_path, "type": "csv"}
61
+ data = []
62
+ try:
63
+ with open(file_path, 'r', newline='') as csvfile:
64
+ reader = csv.reader(csvfile)
65
+ header = next(reader, None) # Skip header row
66
+ metadata["header"] = header
67
+ for row in reader:
68
+ # Attempt to convert row elements to floats
69
+ try:
70
+ numeric_row = [float(item) for item in row]
71
+ data.append(numeric_row)
72
+ except ValueError:
73
+ logger.warning(f"Skipping non-numeric row in {file_path}: {row}")
74
+ continue # Skip rows that can't be fully converted to float
75
+
76
+ if not data:
77
+ logger.warning(f"No numeric data found or processed in CSV file: {file_path}")
78
+ return None, metadata
79
+
80
+ tensor = torch.tensor(data, dtype=torch.float32)
81
+ logger.debug(f"Successfully processed {file_path} into tensor shape {tensor.shape}")
82
+ return tensor, metadata
83
+ except Exception as e:
84
+ logger.error(f"Failed to process CSV file {file_path}: {e}")
85
+ return None, metadata
86
+
87
+
88
+ def preprocess_image(file_path: str) -> Tuple[Optional[torch.Tensor], Dict[str, Any]]:
89
+ """
90
+ Basic Image preprocessor using Pillow and Torchvision transforms.
91
+ Opens an image, applies standard transformations (resize, normalize),
92
+ and returns it as a tensor.
93
+ """
94
+ metadata = {"source_file": file_path, "type": "image"}
95
+ try:
96
+ img = Image.open(file_path).convert('RGB') # Ensure 3 channels (RGB)
97
+
98
+ # Example transform: Resize, convert to tensor, normalize
99
+ # These should ideally be configurable
100
+ transform = T.Compose([
101
+ T.Resize((128, 128)), # Example fixed size
102
+ T.ToTensor(), # Converts PIL image (H, W, C) [0,255] to Tensor (C, H, W) [0,1]
103
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet stats
104
+ ])
105
+
106
+ tensor = transform(img)
107
+ metadata["original_size"] = img.size # (width, height)
108
+ logger.debug(f"Successfully processed {file_path} into tensor shape {tensor.shape}")
109
+ return tensor, metadata
110
+ except FileNotFoundError:
111
+ logger.error(f"Image file not found: {file_path}")
112
+ return None, metadata
113
+ except Exception as e:
114
+ logger.error(f"Failed to process image file {file_path}: {e}")
115
+ return None, metadata
116
+
117
+ # --- Data Ingestion Agent Class ---
118
+
119
+ class DataIngestionAgent:
120
+ """
121
+ Monitors a directory for new files and ingests them into TensorStorage.
122
+ """
123
+
124
+ def __init__(self,
125
+ tensor_storage: TensorStorage,
126
+ dataset_name: str,
127
+ source_directory: str,
128
+ polling_interval_sec: int = 10,
129
+ preprocessing_rules: Optional[Dict[str, Callable[[str], Tuple[Optional[torch.Tensor], Dict[str, Any]]]]] = None):
130
+ """
131
+ Initializes the DataIngestionAgent.
132
+
133
+ Args:
134
+ tensor_storage: An instance of the TensorStorage class.
135
+ dataset_name: The name of the dataset in TensorStorage to ingest into.
136
+ source_directory: The path to the local directory to monitor.
137
+ polling_interval_sec: How often (in seconds) to check the directory.
138
+ preprocessing_rules: A dictionary mapping lowercase file extensions
139
+ (e.g., '.csv', '.png') to preprocessing functions.
140
+ Each function takes a file path and returns a
141
+ Tuple containing the processed Tensor (or None on failure)
142
+ and a metadata dictionary. If None, default rules are used.
143
+ """
144
+ if not isinstance(tensor_storage, TensorStorage):
145
+ raise TypeError("tensor_storage must be an instance of TensorStorage")
146
+ if not os.path.isdir(source_directory):
147
+ raise ValueError(f"Source directory '{source_directory}' does not exist or is not a directory.")
148
+
149
+ self.tensor_storage = tensor_storage
150
+ self.dataset_name = dataset_name
151
+ self.source_directory = source_directory
152
+ self.polling_interval = polling_interval_sec
153
+
154
+ # Ensure dataset exists
155
+ try:
156
+ self.tensor_storage.get_dataset(self.dataset_name)
157
+ logger.info(f"Agent targeting existing dataset '{self.dataset_name}'.")
158
+ except ValueError:
159
+ logger.info(f"Dataset '{self.dataset_name}' not found. Creating it.")
160
+ self.tensor_storage.create_dataset(self.dataset_name)
161
+
162
+ # Default preprocessing rules if none provided
163
+ if preprocessing_rules is None:
164
+ self.preprocessing_rules = {
165
+ '.csv': preprocess_csv,
166
+ '.png': preprocess_image,
167
+ '.jpg': preprocess_image,
168
+ '.jpeg': preprocess_image,
169
+ '.tif': preprocess_image,
170
+ '.tiff': preprocess_image,
171
+ }
172
+ logger.info("Using default preprocessing rules for CSV and common image formats.")
173
+ else:
174
+ self.preprocessing_rules = preprocessing_rules
175
+ logger.info(f"Using custom preprocessing rules for extensions: {list(preprocessing_rules.keys())}")
176
+
177
+ self.processed_files = set() # Keep track of files already processed in this session
178
+ self._stop_event = threading.Event()
179
+ self._monitor_thread = None
180
+ self.status = "stopped" # Status reporting
181
+ self.logs = collections.deque(maxlen=100) # Log capturing
182
+
183
+ # Setup custom log handler
184
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
185
+ memory_handler = AgentMemoryLogHandler(self.logs)
186
+ memory_handler.setFormatter(formatter)
187
+ logger.addHandler(memory_handler) # Add handler to the specific logger instance
188
+
189
+ logger.info(f"DataIngestionAgent initialized for dataset '{self.dataset_name}' monitoring '{self.source_directory}'.")
190
+
191
+ def get_status(self) -> str:
192
+ """Returns the current status of the agent."""
193
+ return self.status
194
+
195
+ def get_logs(self, max_lines: Optional[int] = None) -> List[str]:
196
+ """Returns recent log messages from the agent."""
197
+ if max_lines is None:
198
+ return list(self.logs)
199
+ else:
200
+ return list(self.logs)[-max_lines:]
201
+
202
+ def _validate_data(self, tensor: Optional[torch.Tensor], metadata: Dict[str, Any]) -> bool:
203
+ """
204
+ Performs basic validation on the preprocessed tensor.
205
+ Returns True if valid, False otherwise.
206
+ """
207
+ if tensor is None:
208
+ logger.warning(f"Validation failed: Tensor is None for {metadata.get('source_file', 'N/A')}")
209
+ return False
210
+ if not isinstance(tensor, torch.Tensor):
211
+ logger.warning(f"Validation failed: Output is not a tensor for {metadata.get('source_file', 'N/A')}")
212
+ return False
213
+ # Add more specific checks if needed (e.g., tensor.numel() > 0)
214
+ return True
215
+
216
+ def _process_file(self, file_path: str) -> None:
217
+ """Processes a single detected file."""
218
+ logger.info(f"Detected new file: {file_path}")
219
+ _, file_extension = os.path.splitext(file_path)
220
+ file_extension = file_extension.lower()
221
+
222
+ preprocessor = self.preprocessing_rules.get(file_extension)
223
+
224
+ if preprocessor:
225
+ logger.debug(f"Applying preprocessor for '{file_extension}' to {file_path}")
226
+ try:
227
+ tensor, metadata = preprocessor(file_path)
228
+
229
+ if self._validate_data(tensor, metadata):
230
+ # Ensure tensor is not None before insertion
231
+ if tensor is not None:
232
+ metadata["created_by"] = "IngestionAgent" # Add agent source
233
+ record_id = self.tensor_storage.insert(self.dataset_name, tensor, metadata)
234
+ logger.info(f"Successfully ingested '{file_path}' into dataset '{self.dataset_name}' with record ID: {record_id} (created_by: IngestionAgent)")
235
+ self.processed_files.add(file_path) # Mark as processed only on success
236
+ else:
237
+ # Should have been caught by validation, but as safeguard:
238
+ logger.error(f"Validation passed but tensor is None for {file_path}. Skipping insertion.")
239
+
240
+ else:
241
+ logger.warning(f"Data validation failed for {file_path}. Skipping insertion.")
242
+
243
+ except Exception as e:
244
+ logger.error(f"Unhandled error during preprocessing or insertion for {file_path}: {e}", exc_info=True)
245
+ else:
246
+ logger.debug(f"No preprocessor configured for file extension '{file_extension}'. Skipping file: {file_path}")
247
+
248
+
249
+ def _scan_source_directory(self) -> None:
250
+ """Scans the source directory for new files matching the rules."""
251
+ logger.debug(f"Scanning directory: {self.source_directory}")
252
+ supported_extensions = self.preprocessing_rules.keys()
253
+
254
+ try:
255
+ # Use glob to find all files, then filter
256
+ # This might be inefficient for huge directories, consider os.scandir
257
+ all_files = glob.glob(os.path.join(self.source_directory, '*'), recursive=False) # Non-recursive
258
+
259
+ for file_path in all_files:
260
+ if not os.path.isfile(file_path):
261
+ continue # Skip directories
262
+
263
+ _, file_extension = os.path.splitext(file_path)
264
+ file_extension = file_extension.lower()
265
+
266
+ if file_extension in supported_extensions and file_path not in self.processed_files:
267
+ self._process_file(file_path)
268
+
269
+ except Exception as e:
270
+ logger.error(f"Error scanning source directory '{self.source_directory}': {e}", exc_info=True)
271
+
272
+
273
+ def _monitor_loop(self) -> None:
274
+ """The main loop executed by the background thread."""
275
+ logger.info(f"Starting monitoring loop for '{self.source_directory}'. Polling interval: {self.polling_interval} seconds.")
276
+ while not self._stop_event.is_set():
277
+ self._scan_source_directory()
278
+ # Wait for the specified interval or until stop event is set
279
+ self._stop_event.wait(self.polling_interval)
280
+ logger.info("Monitoring loop stopped.")
281
+
282
+
283
+ def start(self) -> None:
284
+ """Starts the monitoring process in a background thread."""
285
+ if self._monitor_thread is not None and self._monitor_thread.is_alive():
286
+ logger.warning("Agent monitoring is already running.")
287
+ return
288
+
289
+ self._stop_event.clear()
290
+ self._monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
291
+ self._monitor_thread.start()
292
+ if self._monitor_thread.is_alive(): # Check if thread actually started
293
+ self.status = "running"
294
+ logger.info("Data Ingestion Agent started monitoring.")
295
+ else:
296
+ self.status = "error" # Or some other appropriate error state
297
+ logger.error("Data Ingestion Agent failed to start monitoring thread.")
298
+
299
+
300
+ def stop(self) -> None:
301
+ """Signals the monitoring thread to stop."""
302
+ if self._monitor_thread is None or not self._monitor_thread.is_alive():
303
+ logger.info("Agent monitoring is not running.")
304
+ return
305
+
306
+ logger.info("Stopping Data Ingestion Agent monitoring...")
307
+ self._stop_event.set()
308
+ self._monitor_thread.join(timeout=self.polling_interval + 5) # Wait for thread to finish
309
+
310
+ if self._monitor_thread.is_alive():
311
+ logger.warning("Monitoring thread did not stop gracefully after timeout.")
312
+ # self.status remains "running" or could be set to "stopping_error"
313
+ else:
314
+ logger.info("Data Ingestion Agent monitoring stopped successfully.")
315
+ self.status = "stopped"
316
+ self._monitor_thread = None
317
+
318
+
319
+ # --- Example Usage ---
320
+ if __name__ == "__main__":
321
+ run_example = os.getenv("RUN_INGESTION_AGENT_EXAMPLE", "False").lower() == "true"
322
+
323
+ if run_example:
324
+ logger.info("--- Starting Ingestion Agent Example (RUN_INGESTION_AGENT_EXAMPLE=True) ---")
325
+
326
+ # 1. Setup TensorStorage
327
+ storage = TensorStorage()
328
+
329
+ # 2. Setup a temporary directory for the agent to monitor
330
+ source_dir = "temp_ingestion_source"
331
+ if not os.path.exists(source_dir):
332
+ os.makedirs(source_dir)
333
+ logger.info(f"Created temporary source directory: {source_dir}")
334
+
335
+ # 3. Create the Ingestion Agent
336
+ # We'll use a short polling interval for demonstration
337
+ agent = DataIngestionAgent(
338
+ tensor_storage=storage,
339
+ dataset_name="raw_data",
340
+ source_directory=source_dir,
341
+ polling_interval_sec=5
342
+ )
343
+
344
+ # 4. Start the agent (runs in the background)
345
+ agent.start()
346
+
347
+ # 5. Simulate adding files to the source directory
348
+ print("\nSimulating file creation...")
349
+ time.sleep(2) # Give agent time to start initial scan
350
+
351
+ # Create a dummy CSV file
352
+ csv_path = os.path.join(source_dir, "data_1.csv")
353
+ with open(csv_path, 'w', newline='') as f:
354
+ writer = csv.writer(f)
355
+ writer.writerow(["Timestamp", "Value1", "Value2"])
356
+ writer.writerow(["1678886400", "10.5", "20.1"])
357
+ writer.writerow(["1678886460", "11.2", "20.5"])
358
+ writer.writerow(["1678886520", "invalid", "20.9"]) # Test non-numeric row
359
+ writer.writerow(["1678886580", "10.9", "21.0"])
360
+ print(f"Created CSV: {csv_path}")
361
+
362
+ # Create a dummy image file (requires Pillow)
363
+ try:
364
+ img_path = os.path.join(source_dir, "image_1.png")
365
+ dummy_img = Image.new('RGB', (60, 30), color = 'red')
366
+ dummy_img.save(img_path)
367
+ print(f"Created Image: {img_path}")
368
+ except ImportError:
369
+ print("Pillow not installed, skipping image creation. Install with: pip install Pillow")
370
+ except Exception as e:
371
+ print(f"Could not create dummy image: {e}")
372
+
373
+
374
+ # Create an unsupported file type
375
+ txt_path = os.path.join(source_dir, "notes.txt")
376
+ with open(txt_path, 'w') as f:
377
+ f.write("This is a test file.")
378
+ print(f"Created TXT: {txt_path} (should be skipped)")
379
+
380
+
381
+ # 6. Let the agent run for a couple of polling cycles
382
+ print(f"\nWaiting for agent to process files (polling interval {agent.polling_interval}s)...")
383
+ time.sleep(agent.polling_interval * 2 + 1) # Wait for 2 cycles + buffer
384
+
385
+
386
+ # 7. Check the contents of TensorStorage
387
+ print("\n--- Checking TensorStorage contents ---")
388
+ try:
389
+ ingested_data = storage.get_dataset_with_metadata("raw_data")
390
+ print(f"Found {len(ingested_data)} items in dataset 'raw_data':")
391
+ for item in ingested_data:
392
+ print(f" Record ID: {item['metadata'].get('record_id')}, Source: {item['metadata'].get('source_file')}, Shape: {item['tensor'].shape}, Dtype: {item['tensor'].dtype}")
393
+ # print(f" Tensor: {item['tensor']}") # Can be verbose
394
+ except ValueError as e:
395
+ print(f"Could not retrieve dataset 'raw_data': {e}")
396
+
397
+
398
+ # 8. Stop the agent
399
+ print("\n--- Stopping Agent ---")
400
+ agent.stop()
401
+
402
+ # 9. Clean up the temporary directory (optional)
403
+ # print(f"\nCleaning up temporary directory: {source_dir}")
404
+ # import shutil
405
+ # shutil.rmtree(source_dir)
406
+
407
+ logger.info("--- Ingestion Agent Example Finished ---")
408
+ else:
409
+ logger.info("--- Ingestion Agent Example SKIPPED (RUN_INGESTION_AGENT_EXAMPLE not set to 'true') ---")
tensorus/mcp_client.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from typing import Any, AsyncIterator, List, Optional, Sequence, Type, TypeVar, Union
8
+
9
+ from pydantic import BaseModel, ValidationError
10
+ from fastmcp.client import Client as FastMCPClient
11
+ from fastmcp.exceptions import FastMCPError
12
+ from fastmcp.client.transports import StreamableHttpTransport # Changed import path and class name
13
+
14
+ # Import the TextContent type that the server is actually using
15
+ from mcp.types import TextContent
16
+
17
+ # Type variable for Pydantic models
18
+ T = TypeVar('T', bound=BaseModel)
19
+
20
+ logger = logging.getLogger("tensorus.mcp.client")
21
+ logger.setLevel(logging.INFO)
22
+
23
+ class MCPResponseError(Exception):
24
+ """Generic exception for MCP response errors"""
25
+ pass
26
+
27
+ class TensorusMCPClient:
28
+ """High-level client for the Tensorus MCP server with typed, prompt, sync, and streaming support."""
29
+
30
+ def __init__(self, transport: Any) -> None:
31
+ self._client = FastMCPClient(transport)
32
+
33
+ @staticmethod
34
+ def from_http(url: str) -> TensorusMCPClient:
35
+ """Factory using Streamable HTTP transport"""
36
+ transport = StreamableHttpTransport(url=url) # Updated class name usage
37
+ return TensorusMCPClient(transport)
38
+
39
+ async def __aenter__(self) -> TensorusMCPClient:
40
+ await self._client.__aenter__()
41
+ return self
42
+
43
+ async def __aexit__(
44
+ self,
45
+ exc_type: Any,
46
+ exc_val: Any,
47
+ exc_tb: Any
48
+ ) -> None:
49
+ await self._client.__aexit__(exc_type, exc_val, exc_tb)
50
+
51
+ async def _call_json(
52
+ self,
53
+ name: str,
54
+ args: Optional[dict] = None,
55
+ response_model: Type[T] | None = None
56
+ ) -> Union[dict, T, None]:
57
+ """
58
+ Internal helper to call a tool and parse JSON or Pydantic model.
59
+ """
60
+ try:
61
+ result = await self._client.call_tool(name, args or {})
62
+ except FastMCPError as e:
63
+ logger.error(f"Tool call failed: {name} args={args} error={e}")
64
+ raise MCPResponseError(str(e))
65
+
66
+ if not result:
67
+ return None
68
+ content = result[0]
69
+ if not isinstance(content, TextContent):
70
+ raise TypeError(f"Unexpected content type: {type(content)}")
71
+
72
+ data = json.loads(content.text)
73
+ if response_model:
74
+ try:
75
+ # Handle cases where the actual data for the model is nested
76
+ if isinstance(data, dict) and 'data' in data and response_model == DatasetListResponse :
77
+ # Specific handling for DatasetListResponse if data is nested under 'data'
78
+ # and the model itself expects a flat structure like {"datasets": []}
79
+ # This might indicate an API inconsistency or a client model mismatch.
80
+ # For now, let's assume the API returns {..., "data": actual_list_for_datasets_field}
81
+ # and DatasetListResponse expects {"datasets": actual_list_for_datasets_field}
82
+ # So, we need to re-wrap it:
83
+ return response_model.parse_obj({"datasets": data['data']})
84
+ elif response_model == IngestTensorResponse and isinstance(data, dict) and data.get('success') is True and isinstance(data.get('data'), dict) and 'record_id' in data['data']:
85
+ # Specific handling for IngestTensorResponse if data is nested and needs id mapping
86
+ return IngestTensorResponse(id=data['data']['record_id'], status="ingested") # Assuming "ingested" is the status on success
87
+ elif response_model == TensorDetailsResponse and isinstance(data, dict) and 'record_id' in data:
88
+ # Map API's 'record_id' to model's 'id' field
89
+ data_for_model = data.copy()
90
+ data_for_model['id'] = data_for_model.pop('record_id')
91
+ return TensorDetailsResponse.parse_obj(data_for_model)
92
+
93
+ # For other models (like CreateDatasetResponse, DeleteDatasetResponse, etc.),
94
+ # parse the data directly. This assumes 'data' dictionary as a whole matches the response_model.
95
+ return response_model.parse_obj(data)
96
+ except ValidationError as ve:
97
+ logger.error(f"Response validation failed for {name}: {ve}. Data: {data}")
98
+ raise
99
+ return data
100
+
101
+ async def call_tool_stream(
102
+ self,
103
+ name: str,
104
+ args: Optional[dict] = None
105
+ ) -> AsyncIterator[str]:
106
+ """
107
+ Stream partial TextContent.text chunks from a long-running tool.
108
+ """
109
+ try:
110
+ async for msg in self._client.call_tool_stream(name, args or {}):
111
+ yield msg.text
112
+ except FastMCPError as e:
113
+ logger.error(f"Streaming call failed: {name} error={e}")
114
+ raise MCPResponseError(str(e))
115
+
116
+ # --- Prompt support ---
117
+ async def call_prompt(
118
+ self,
119
+ prompt_name: str,
120
+ args: Optional[dict] = None
121
+ ) -> List[TextContent]:
122
+ """
123
+ Invoke a prompt template defined on the MCP server.
124
+ """
125
+ try:
126
+ return await self._client.call_prompt(prompt_name, args or {})
127
+ except FastMCPError as e:
128
+ logger.error(f"Prompt call failed: {prompt_name} args={args} error={e}")
129
+ raise MCPResponseError(str(e))
130
+
131
+ # --- Dataset management ---
132
+ async def list_datasets(self) -> list[str]:
133
+ return await self._call_json(
134
+ "tensorus_list_datasets",
135
+ response_model=DatasetListResponse
136
+ )
137
+
138
+ def list_datasets_sync(self) -> list[str]:
139
+ """Sync wrapper for list_datasets"""
140
+ return asyncio.run(self.list_datasets())
141
+
142
+ async def create_dataset(self, dataset_name: str) -> CreateDatasetResponse:
143
+ return await self._call_json(
144
+ "tensorus_create_dataset",
145
+ {"dataset_name": dataset_name},
146
+ response_model=CreateDatasetResponse
147
+ )
148
+
149
+ def create_dataset_sync(self, dataset_name: str) -> CreateDatasetResponse:
150
+ return asyncio.run(self.create_dataset(dataset_name))
151
+
152
+ async def delete_dataset(self, dataset_name: str) -> DeleteDatasetResponse:
153
+ return await self._call_json(
154
+ "tensorus_delete_dataset",
155
+ {"dataset_name": dataset_name},
156
+ response_model=DeleteDatasetResponse
157
+ )
158
+
159
+ def delete_dataset_sync(self, dataset_name: str) -> DeleteDatasetResponse:
160
+ return asyncio.run(self.delete_dataset(dataset_name))
161
+
162
+ # --- Tensor management ---
163
+ async def ingest_tensor(
164
+ self,
165
+ dataset_name: str,
166
+ tensor_shape: Sequence[int],
167
+ tensor_dtype: str,
168
+ tensor_data: Any,
169
+ metadata: Optional[dict] = None,
170
+ ) -> IngestTensorResponse:
171
+ payload = {
172
+ "dataset_name": dataset_name,
173
+ "tensor_shape": list(tensor_shape),
174
+ "tensor_dtype": tensor_dtype,
175
+ "tensor_data": tensor_data,
176
+ "metadata": metadata,
177
+ }
178
+ return await self._call_json(
179
+ "tensorus_ingest_tensor",
180
+ payload,
181
+ response_model=IngestTensorResponse
182
+ )
183
+
184
+ async def get_tensor_details(self, dataset_name: str, record_id: str) -> TensorDetailsResponse:
185
+ return await self._call_json(
186
+ "tensorus_get_tensor_details",
187
+ {"dataset_name": dataset_name, "record_id": record_id},
188
+ response_model=TensorDetailsResponse
189
+ )
190
+
191
+ async def delete_tensor(self, dataset_name: str, record_id: str) -> DeleteTensorResponse:
192
+ return await self._call_json(
193
+ "tensorus_delete_tensor",
194
+ {"dataset_name": dataset_name, "record_id": record_id},
195
+ response_model=DeleteTensorResponse
196
+ )
197
+
198
+ async def update_tensor_metadata(
199
+ self,
200
+ dataset_name: str,
201
+ record_id: str,
202
+ new_metadata: dict
203
+ ) -> UpdateMetadataResponse:
204
+ return await self._call_json(
205
+ "tensorus_update_tensor_metadata",
206
+ {"dataset_name": dataset_name, "record_id": record_id, "new_metadata": new_metadata},
207
+ response_model=UpdateMetadataResponse
208
+ )
209
+
210
+ # --- Tensor operations ---
211
+ async def apply_unary_operation(self, operation: str, payload: dict) -> OperationResponse:
212
+ return await self._call_json(
213
+ "tensorus_apply_unary_operation",
214
+ {"operation": operation, **payload},
215
+ response_model=OperationResponse
216
+ )
217
+
218
+ async def apply_binary_operation(self, operation: str, payload: dict) -> OperationResponse:
219
+ return await self._call_json(
220
+ "tensorus_apply_binary_operation",
221
+ {"operation": operation, **payload},
222
+ response_model=OperationResponse
223
+ )
224
+
225
+ async def apply_list_operation(self, operation: str, payload: dict) -> OperationResponse:
226
+ return await self._call_json(
227
+ "tensorus_apply_list_operation",
228
+ {"operation": operation, **payload},
229
+ response_model=OperationResponse
230
+ )
231
+
232
+ async def apply_einsum(self, payload: dict) -> OperationResponse:
233
+ return await self._call_json(
234
+ "tensorus_apply_einsum",
235
+ payload,
236
+ response_model=OperationResponse
237
+ )
238
+
239
+ # --- Miscellaneous ---
240
+ async def execute_nql_query(self, query: str) -> NQLQueryResponse:
241
+ return await self._call_json(
242
+ "execute_nql_query",
243
+ {"query": query},
244
+ response_model=NQLQueryResponse
245
+ )
246
+
247
+ # --- Pydantic response models ---
248
+ class DatasetListResponse(BaseModel):
249
+ datasets: list[str]
250
+
251
+ class CreateDatasetResponse(BaseModel):
252
+ success: bool
253
+ message: Optional[str]
254
+
255
+ class DeleteDatasetResponse(BaseModel):
256
+ success: bool
257
+
258
+ class IngestTensorResponse(BaseModel):
259
+ id: str
260
+ status: str
261
+
262
+ class TensorDetailsResponse(BaseModel):
263
+ id: str
264
+ shape: list[int]
265
+ dtype: str
266
+ data: Any
267
+ metadata: Optional[dict]
268
+
269
+ class DeleteTensorResponse(BaseModel):
270
+ success: bool
271
+
272
+ class UpdateMetadataResponse(BaseModel):
273
+ success: bool
274
+
275
+ class OperationResponse(BaseModel):
276
+ result: Any
277
+
278
+ class NQLQueryResponse(BaseModel):
279
+ results: Any
280
+
281
+ # End of TensorusMCPClient