Text Generation
English
smriti-memory-ai
smriti-ai
memory
agent-memory
long-term-memory
external-memory
training-free
frozen-model
inference-time-augmentation
retrieval-augmented-generation
rag
semantic-search
knowledge-graph
identity-continuity
small-language-model
small-language-models
ai-agent
gemma
gemma-4
qwen
qwen2.5
llama
llama-3.2
phi-3
| #!/usr/bin/env python3 | |
| """Local smoke test for the Smriti AI Hugging Face custom handler.""" | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from pathlib import Path | |
| from handler import EndpointHandler | |
| def pretty(title: str, payload: dict) -> None: | |
| print(f"\n=== {title} ===") | |
| print(json.dumps(payload, indent=2, ensure_ascii=False)) | |
| def main() -> int: | |
| # Default to memory-only smoke mode so local validation never downloads or | |
| # reports against a tiny/test model. Set BASE_MODEL_ID=google/gemma-4-E2B-it | |
| # or HF_ENDPOINT_URL explicitly to test real model generation. | |
| os.environ.setdefault("BASE_MODEL_ID", "") | |
| os.environ.setdefault("SMRITI_MEMORY_BACKEND", "json") | |
| os.environ.setdefault("SMRITI_MEMORY_PATH", "/tmp/smriti_hf_test.json") | |
| os.environ.setdefault("SMRITI_RETRIEVAL_MODE", "semantic_graph_identity") | |
| retrieval_mode = os.environ["SMRITI_RETRIEVAL_MODE"] | |
| handler = EndpointHandler(path=str(Path(__file__).resolve().parent)) | |
| pretty("health", handler({"inputs": {"operation": "health"}})) | |
| pretty( | |
| "fact injection", | |
| handler( | |
| { | |
| "inputs": { | |
| "operation": "chat", | |
| "user_id": "local-demo-user", | |
| "message": "My name is Alex and I am a marine biologist based in Hawaii.", | |
| "retrieval_mode": retrieval_mode, | |
| }, | |
| "parameters": {"max_new_tokens": 64, "return_memories": True}, | |
| } | |
| ), | |
| ) | |
| pretty( | |
| "distractor", | |
| handler( | |
| { | |
| "inputs": { | |
| "operation": "chat", | |
| "user_id": "local-demo-user", | |
| "message": "What is the capital of France?", | |
| "retrieval_mode": retrieval_mode, | |
| }, | |
| "parameters": {"max_new_tokens": 64, "return_memories": True}, | |
| } | |
| ), | |
| ) | |
| pretty( | |
| "recall", | |
| handler( | |
| { | |
| "inputs": { | |
| "operation": "chat", | |
| "user_id": "local-demo-user", | |
| "message": "What do you remember about me?", | |
| "retrieval_mode": retrieval_mode, | |
| }, | |
| "parameters": {"max_new_tokens": 64, "return_memories": True}, | |
| } | |
| ), | |
| ) | |
| pretty( | |
| "delete memory", | |
| handler({"inputs": {"operation": "delete_memory", "user_id": "local-demo-user"}}), | |
| ) | |
| pretty( | |
| "recall after delete", | |
| handler( | |
| { | |
| "inputs": { | |
| "operation": "chat", | |
| "user_id": "local-demo-user", | |
| "message": "What do you remember about me?", | |
| "retrieval_mode": retrieval_mode, | |
| }, | |
| "parameters": {"max_new_tokens": 64, "return_memories": True}, | |
| } | |
| ), | |
| ) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |