diff --git a/.env.template b/.env.template new file mode 100644 index 0000000000000000000000000000000000000000..05c1520d577ed64c358723d644b47ba4f2bdb388 --- /dev/null +++ b/.env.template @@ -0,0 +1,37 @@ +# MediGuard AI RAG-Helper - Environment Configuration Template +# Copy this file to .env and fill in your values + +# ============================================================================ +# LLM PROVIDER CONFIGURATION (Choose ONE - all have FREE tiers) +# ============================================================================ + +# Option 1: GROQ (RECOMMENDED - FREE, fast, llama-3.3-70b) +# Get FREE API key: https://console.groq.com/keys +GROQ_API_KEY="your_groq_api_key_here" + +# Option 2: Google Gemini (FREE tier available) +# Get FREE API key: https://aistudio.google.com/app/apikey +GOOGLE_API_KEY="your_google_api_key_here" + +# Provider selection: "groq" (default), "gemini", or "ollama" (local) +LLM_PROVIDER="groq" + +# Embedding provider: "google" (default, FREE), "huggingface" (local), or "ollama" +EMBEDDING_PROVIDER="google" + +# ============================================================================ +# LANGSMITH (Optional - for tracing/debugging) +# ============================================================================ +LANGCHAIN_API_KEY="your_langsmith_api_key_here" +LANGCHAIN_TRACING_V2="true" +LANGCHAIN_PROJECT="MediGuard_AI_RAG_Helper" + +# ============================================================================ +# APPLICATION SETTINGS +# ============================================================================ +LOG_LEVEL="INFO" + +# ============================================================================ +# OLLAMA (Only needed if using LLM_PROVIDER="ollama") +# ============================================================================ +# OLLAMA_HOST="http://localhost:11434" diff --git a/.gitignore b/.gitignore index 2eea525d885d5148108f6f3a9a8613863f783d36..910d8208e72490a5cd217a36dd04ab23709eca49 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,295 @@ -.env \ No newline at end of file +# ============================================================================== +# MediGuard AI RAG-Helper - Git Ignore Configuration +# ============================================================================== + +# ============================================================================== +# Environment & Secrets +# ============================================================================== +.env +.env.local +.env.*.local +*.env +**/.env + +# API Keys and secrets +secrets/ +*.key +*.pem +*.p12 + +# ============================================================================== +# Python +# ============================================================================== +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Distribution / packaging +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ +.venv/ +.virtualenv/ +virtualenv/ + +# PyInstaller +*.manifest +*.spec + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff +instance/ +.webassets-cache + +# Scrapy stuff +.scrapy + +# Sphinx documentation +docs/_build/ +docs/.doctrees/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb_checkpoints/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# poetry +poetry.lock + +# PEP 582 +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# ============================================================================== +# IDEs & Editors +# ============================================================================== +# VSCode +.vscode/ +*.code-workspace + +# PyCharm +.idea/ +*.iml +*.iws +*.ipr + +# Sublime Text +*.sublime-project +*.sublime-workspace + +# Vim +*.swp +*.swo +*~ + +# Emacs +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc + +# ============================================================================== +# OS +# ============================================================================== +# macOS +.DS_Store +.AppleDouble +.LSOverride +._* +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Windows +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db +*.stackdump +[Dd]esktop.ini +$RECYCLE.BIN/ +*.cab +*.msi +*.msix +*.msm +*.msp +*.lnk + +# Linux +*~ +.directory +.Trash-* +.nfs* + +# ============================================================================== +# Project Specific +# ============================================================================== +# Vector stores (large files, regenerate locally) +data/vector_stores/*.faiss +data/vector_stores/*.pkl +*.faiss +*.pkl + +# Medical PDFs (proprietary/large) +data/medical_pdfs/*.pdf + +# Generated outputs +data/outputs/ +outputs/ +results/ +*.json.bak + +# Logs +logs/ +*.log +log_*.txt + +# Temporary files +tmp/ +temp/ +*.tmp +*.temp +*.bak +*.swp + +# Test outputs +test_outputs/ +test_results/ + +# Evolution outputs +evolution_outputs/ +pareto_*.png +sop_evolution_*.json + +# Cache +.cache/ +*.cache + +# ============================================================================== +# LangChain / LangSmith +# ============================================================================== +.langchain/ +langchain_cache/ +langsmith_cache/ + +# ============================================================================== +# Docker +# ============================================================================== +.dockerignore +docker-compose.override.yml + +# ============================================================================== +# Other +# ============================================================================== +# Backup files +*.backup +*.old + +# Compressed files +*.zip +*.tar.gz +*.rar + +# Large model files +*.gguf +*.bin +models/ + +# Node modules (if any JS tooling) +node_modules/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..b353b30871abf9fb8fc13ebd8db3351eb462f745 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,434 @@ +# Contributing to MediGuard AI RAG-Helper + +First off, thank you for considering contributing to MediGuard AI! It's people like you that make this project better for everyone. + +## 📋 Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [Getting Started](#getting-started) +- [How Can I Contribute?](#how-can-i-contribute) +- [Development Setup](#development-setup) +- [Style Guidelines](#style-guidelines) +- [Commit Messages](#commit-messages) +- [Pull Request Process](#pull-request-process) + +## Code of Conduct + +This project adheres to a code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to the project maintainers. + +### Our Standards + +- **Be Respectful**: Treat everyone with respect +- **Be Collaborative**: Work together effectively +- **Be Professional**: Maintain professionalism at all times +- **Be Inclusive**: Welcome diverse perspectives and backgrounds + +## Getting Started + +### Prerequisites + +- Python 3.11+ +- Git +- A GitHub account +- FREE API key from Groq or Google Gemini + +### First Contribution + +1. **Fork the repository** +2. **Clone your fork** + ```bash + git clone https://github.com/your-username/RagBot.git + cd RagBot + ``` +3. **Set up development environment** (see below) +4. **Create a new branch** + ```bash + git checkout -b feature/your-feature-name + ``` + +## How Can I Contribute? + +### 🐛 Reporting Bugs + +**Before submitting a bug report:** +- Check the [existing issues](https://github.com/yourusername/RagBot/issues) +- Ensure you're using the latest version +- Collect relevant information (Python version, OS, error messages) + +**How to submit a good bug report:** +- Use a clear and descriptive title +- Describe the exact steps to reproduce +- Provide specific examples +- Describe the behavior you observed and what you expected +- Include screenshots if applicable +- Include your environment details + +**Template:** +```markdown +## Bug Description +[Clear description of the bug] + +## Steps to Reproduce +1. +2. +3. + +## Expected Behavior +[What should happen] + +## Actual Behavior +[What actually happens] + +## Environment +- OS: [e.g., Windows 11, macOS 14, Ubuntu 22.04] +- Python Version: [e.g., 3.11.5] +- MediGuard Version: [e.g., 1.0.0] + +## Additional Context +[Any other relevant information] +``` + +### 💡 Suggesting Enhancements + +**Before submitting an enhancement suggestion:** +- Check if it's already been suggested +- Determine which part of the project it relates to +- Consider if it aligns with the project's goals + +**How to submit a good enhancement suggestion:** +- Use a clear and descriptive title +- Provide a detailed description of the proposed enhancement +- Explain why this enhancement would be useful +- List potential benefits and drawbacks +- Provide examples or mockups if applicable + +### 🔨 Pull Requests + +**Good first issues:** +- Look for issues labeled `good first issue` +- Documentation improvements +- Test coverage improvements +- Bug fixes + +**Areas needing contribution:** +- Additional biomarker support +- Disease model improvements +- Performance optimizations +- Documentation enhancements +- Test coverage +- UI/UX improvements + +## Development Setup + +### 1. Fork and Clone + +```bash +# Fork via GitHub UI, then: +git clone https://github.com/your-username/RagBot.git +cd RagBot +``` + +### 2. Create Virtual Environment + +```bash +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +``` + +### 3. Install Dependencies + +```bash +# Core dependencies +pip install -r requirements.txt + +# Development dependencies +pip install pytest pytest-cov black flake8 mypy +``` + +### 4. Configure Environment + +```bash +cp .env.template .env +# Edit .env with your API keys +``` + +### 5. Run Tests + +```bash +# Run all tests +pytest + +# Run with coverage +pytest --cov=src --cov-report=html + +# Run specific test file +pytest tests/test_basic.py +``` + +## Style Guidelines + +### Python Code Style + +We follow **PEP 8** with some modifications: + +- **Line length**: 100 characters maximum +- **Imports**: Organized with `isort` +- **Formatting**: Automated with `black` +- **Type hints**: Required for function signatures +- **Docstrings**: Google style + +### Code Formatting + +**Before committing, run:** + +```bash +# Auto-format code +black src/ scripts/ tests/ + +# Check style compliance +flake8 src/ scripts/ tests/ + +# Type checking +mypy src/ + +# Import sorting +isort src/ scripts/ tests/ +``` + +### Docstring Example + +```python +def analyze_biomarkers( + biomarkers: Dict[str, float], + patient_context: Optional[Dict[str, Any]] = None +) -> AnalysisResult: + """ + Analyze patient biomarkers and generate clinical insights. + + Args: + biomarkers: Dictionary of biomarker names to values + patient_context: Optional patient demographic information + + Returns: + AnalysisResult containing predictions and recommendations + + Raises: + ValueError: If biomarkers dictionary is empty + ValidationError: If biomarker values are invalid + + Example: + >>> result = analyze_biomarkers({"Glucose": 185, "HbA1c": 8.2}) + >>> print(result.prediction.disease) + 'Diabetes' + """ + pass +``` + +### Testing Guidelines + +- **Write tests** for all new features +- **Maintain coverage** above 80% +- **Test edge cases** and error conditions +- **Use descriptive test names** + +**Test Example:** + +```python +def test_biomarker_validation_with_critical_high_glucose(): + """Test that critically high glucose values trigger safety alerts.""" + validator = BiomarkerValidator() + biomarkers = {"Glucose": 400} # Critically high + + flags, alerts = validator.validate_all(biomarkers) + + assert len(alerts) > 0 + assert any("critical" in alert.message.lower() for alert in alerts) +``` + +## Commit Messages + +### Format + +``` +(): + + + +