Spaces:
Sleeping
Sleeping
Commit ·
ee14b8f
0
Parent(s):
project start
Browse files- .dockerignore +33 -0
- .gitignore +10 -0
- .python-version +1 -0
- Dockerfile +32 -0
- HUGGINGFACE_DEPLOYMENT.md +113 -0
- README.md +45 -0
- app.py +623 -0
- assets/styles.css +333 -0
- core/asr_engine.py +46 -0
- core/entity_extractor.py +162 -0
- core/excel_exporter.py +81 -0
- core/preprocessor.py +35 -0
- data/aliases.json +83 -0
- data/manufacturers.csv +7 -0
- data/medicines.csv +37 -0
- docs/DEPLOYMENT_GUIDE.md +190 -0
- docs/GETTING_STARTED.md +79 -0
- docs/HUGGINGFACE_SPACE_SETUP.md +63 -0
- evaluation/metrics.py +33 -0
- main.py +11 -0
- prompts/asr_prompt_guide.md +145 -0
- pyproject.toml +34 -0
- requirements.txt +14 -0
- simulation/manufacturer_db.py +98 -0
- simulation/order_queue.py +26 -0
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual environment
|
| 2 |
+
.venv/
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
| 5 |
+
|
| 6 |
+
# Git
|
| 7 |
+
.git/
|
| 8 |
+
.gitignore
|
| 9 |
+
|
| 10 |
+
# Cache
|
| 11 |
+
__pycache__/
|
| 12 |
+
*.pyc
|
| 13 |
+
*.pyo
|
| 14 |
+
.cache/
|
| 15 |
+
|
| 16 |
+
# IDE
|
| 17 |
+
.vscode/
|
| 18 |
+
.idea/
|
| 19 |
+
|
| 20 |
+
# UV files (not needed in Docker)
|
| 21 |
+
uv.lock
|
| 22 |
+
pyproject.toml
|
| 23 |
+
|
| 24 |
+
# Docs (optional, reduce image size)
|
| 25 |
+
docs/
|
| 26 |
+
|
| 27 |
+
# Test files
|
| 28 |
+
tests/
|
| 29 |
+
*.test.py
|
| 30 |
+
|
| 31 |
+
# OS files
|
| 32 |
+
.DS_Store
|
| 33 |
+
Thumbs.db
|
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 slim for smaller image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies for audio processing
|
| 8 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
ffmpeg \
|
| 10 |
+
libsndfile1 \
|
| 11 |
+
git \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Install uv for fast Python package management
|
| 15 |
+
RUN pip install uv
|
| 16 |
+
|
| 17 |
+
# Copy project files
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
# Install Python dependencies
|
| 21 |
+
RUN uv pip install --system --no-cache-dir -r requirements.txt
|
| 22 |
+
|
| 23 |
+
# Expose HuggingFace Spaces default port
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
|
| 26 |
+
# Set environment variables
|
| 27 |
+
ENV PYTHONUNBUFFERED=1
|
| 28 |
+
ENV STREAMLIT_SERVER_PORT=7860
|
| 29 |
+
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 30 |
+
|
| 31 |
+
# Run the Streamlit app
|
| 32 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
HUGGINGFACE_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces Deployment Guide
|
| 2 |
+
|
| 3 |
+
This guide explains how to deploy the **Pharma Voice Orders** application to Hugging Face Spaces using a Docker Space.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Prerequisites
|
| 8 |
+
|
| 9 |
+
1. A [Hugging Face account](https://huggingface.co/join).
|
| 10 |
+
2. A Hugging Face Space created with **Docker SDK**.
|
| 11 |
+
3. Git installed on your local machine.
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## Step 1: Install Hugging Face CLI
|
| 16 |
+
|
| 17 |
+
Install the CLI globally:
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
pip install huggingface_hub
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
## Step 2: Login to Hugging Face
|
| 26 |
+
|
| 27 |
+
Authenticate with your HF token (get one from [Settings > Tokens](https://huggingface.co/settings/tokens)):
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
huggingface-cli login
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
Enter your token when prompted. This saves your credentials for Git operations.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Step 3: Add HF Space as Git Remote
|
| 38 |
+
|
| 39 |
+
Navigate to your project folder and add the Space as a remote:
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
cd pharma-voice-orders
|
| 43 |
+
git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/pharma-voice-orders
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Replace `YOUR_USERNAME` with your actual HuggingFace username (e.g., `Khedhar`).
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## Step 4: Push to Hugging Face
|
| 51 |
+
|
| 52 |
+
Force push your code to the Space. **Important:** HuggingFace Spaces uses `main` as the default branch.
|
| 53 |
+
|
| 54 |
+
If your local branch is `master`:
|
| 55 |
+
```bash
|
| 56 |
+
git push hf master:main --force
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
If your local branch is already `main`:
|
| 60 |
+
```bash
|
| 61 |
+
git push hf main --force
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
> **Tip:** To check your current branch name, run: `git branch`
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## Step 5: Verify Deployment
|
| 69 |
+
|
| 70 |
+
1. Go to your Space: `https://huggingface.co/spaces/YOUR_USERNAME/pharma-voice-orders`
|
| 71 |
+
2. Wait for the build to complete (check the **Logs** tab).
|
| 72 |
+
3. Once running, the app will be live at the Space URL.
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
## Dockerfile Notes
|
| 77 |
+
|
| 78 |
+
The `Dockerfile` in this project:
|
| 79 |
+
- Uses Python 3.11 slim image.
|
| 80 |
+
- Installs system dependencies for audio processing.
|
| 81 |
+
- Installs Python dependencies with `uv`.
|
| 82 |
+
- Exposes port `7860` (HF Spaces default).
|
| 83 |
+
|
| 84 |
+
---
|
| 85 |
+
|
| 86 |
+
## Environment Variables (Optional)
|
| 87 |
+
|
| 88 |
+
If your app requires secrets (e.g., `HF_TOKEN`), configure them in Space Settings > Repository Secrets.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## Troubleshooting
|
| 93 |
+
|
| 94 |
+
| Issue | Solution |
|
| 95 |
+
|-------|----------|
|
| 96 |
+
| `Permission denied` | Run `huggingface-cli login` again |
|
| 97 |
+
| `Build failed` | Check Logs tab for error details |
|
| 98 |
+
| `Port not accessible` | Ensure `Dockerfile` exposes port `7860` |
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
## Useful Commands
|
| 103 |
+
|
| 104 |
+
```bash
|
| 105 |
+
# Check current remotes
|
| 106 |
+
git remote -v
|
| 107 |
+
|
| 108 |
+
# Remove HF remote
|
| 109 |
+
git remote remove hf
|
| 110 |
+
|
| 111 |
+
# Re-add HF remote
|
| 112 |
+
git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/pharma-voice-orders
|
| 113 |
+
```
|
README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🏥 Pharma Voice Orders
|
| 2 |
+
|
| 3 |
+
> **Accent-Aware Speech-to-Text Engine for Distributor Order Processing**
|
| 4 |
+
|
| 5 |
+
This application helps pharmaceutical manufacturers process voice orders from primary distributors efficiently. It simulates an end-to-end pipeline:
|
| 6 |
+
1. **Distributor Input**: Voice recording of orders (e.g., "Send 20 strips of Augmentin 625").
|
| 7 |
+
2. **AI Processing**: Transcription using OpenAI Whisper and Entity Extraction.
|
| 8 |
+
3. **Simulation**: Routing orders to specific manufacturer boxes (Sun Pharma, GSK, etc.).
|
| 9 |
+
4. **Export**: Generating structured Excel sheets for ERP systems.
|
| 10 |
+
|
| 11 |
+
## 🚀 Quick Start
|
| 12 |
+
|
| 13 |
+
1. **Install Dependencies**:
|
| 14 |
+
```bash
|
| 15 |
+
pip install -r requirements.txt
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
2. **Run the Application**:
|
| 19 |
+
```bash
|
| 20 |
+
streamlit run app.py
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
## 📂 Project Structure
|
| 24 |
+
|
| 25 |
+
- `app.py`: Main Streamlit application entry point.
|
| 26 |
+
- `core/`: Contains ASR engine, Preprocessor, and Entity Extractor.
|
| 27 |
+
- `simulation/`: Mock database and order routing logic.
|
| 28 |
+
- `data/`: Sample medicine and manufacturer databases.
|
| 29 |
+
- `evaluation/`: Scripts to calculate WER, Accuracy, and Latency.
|
| 30 |
+
|
| 31 |
+
## 🛠️ Tech Stack
|
| 32 |
+
|
| 33 |
+
- **Frontend**: Streamlit
|
| 34 |
+
- **AI Model**: OpenAI Whisper (via HuggingFace Transformers)
|
| 35 |
+
- **Data Processing**: Pandas, OpenPyXL
|
| 36 |
+
- **Matching**: RapidFuzz (Fuzzy String Matching)
|
| 37 |
+
- **Audio**: Librosa, SoundFile
|
| 38 |
+
|
| 39 |
+
## 🎓 University Use
|
| 40 |
+
|
| 41 |
+
This project demonstrates the "Minor Project" proposal deliverables:
|
| 42 |
+
- Noise Reduction & Preprocessing
|
| 43 |
+
- Accent-Aware STT (simulated via Whisper)
|
| 44 |
+
- Entity Extraction (Medicine/Dosage/Quantity)
|
| 45 |
+
- Performance Evaluation (WER Report)
|
app.py
ADDED
|
@@ -0,0 +1,623 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pharma Voice Orders - Main Application
|
| 3 |
+
Streamlit UI for simulating Distributor -> Manufacturer Voice Ordering System
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import time
|
| 9 |
+
import os
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Page Config
|
| 13 |
+
st.set_page_config(
|
| 14 |
+
page_title="Pharma Voice Orders",
|
| 15 |
+
page_icon="🏥",
|
| 16 |
+
layout="wide",
|
| 17 |
+
initial_sidebar_state="expanded"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Custom CSS - Avant-Garde Glassmorphic Design
|
| 21 |
+
def load_css(file_name):
|
| 22 |
+
with open(file_name) as f:
|
| 23 |
+
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
| 24 |
+
|
| 25 |
+
load_css("assets/styles.css")
|
| 26 |
+
|
| 27 |
+
# --- Session State Initialization ---
|
| 28 |
+
if 'model_ready' not in st.session_state:
|
| 29 |
+
st.session_state.model_ready = False
|
| 30 |
+
if 'orders' not in st.session_state:
|
| 31 |
+
st.session_state.orders = []
|
| 32 |
+
if 'last_transcription' not in st.session_state:
|
| 33 |
+
st.session_state.last_transcription = ""
|
| 34 |
+
|
| 35 |
+
# --- Sidebar ---
|
| 36 |
+
with st.sidebar:
|
| 37 |
+
st.image("https://cdn-icons-png.flaticon.com/512/3063/3063167.png", width=50)
|
| 38 |
+
st.title("PharmaVoice")
|
| 39 |
+
st.caption("v1.0.0 | Minor Project")
|
| 40 |
+
|
| 41 |
+
st.markdown("---")
|
| 42 |
+
st.header("⚙️ Configuration")
|
| 43 |
+
|
| 44 |
+
distributor = st.selectbox(
|
| 45 |
+
"Select Distributor",
|
| 46 |
+
["Apollo Pharmacy", "MedPlus", "Frank Ross", "Online Pharma", "Local Chemist"]
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
asr_model = st.selectbox(
|
| 50 |
+
"ASR Model",
|
| 51 |
+
[
|
| 52 |
+
"google/medasr",
|
| 53 |
+
"openai/whisper-tiny",
|
| 54 |
+
"openai/whisper-small",
|
| 55 |
+
"openai/whisper-medium",
|
| 56 |
+
"openai/whisper-large-v3",
|
| 57 |
+
]
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Note about MedASR - now enabled!
|
| 61 |
+
if "medasr" in asr_model:
|
| 62 |
+
st.success("✅ MedASR enabled (transformers from GitHub installed)")
|
| 63 |
+
|
| 64 |
+
st.markdown("---")
|
| 65 |
+
|
| 66 |
+
# Inference Mode Toggle
|
| 67 |
+
st.subheader("⚡ Inference Mode")
|
| 68 |
+
|
| 69 |
+
# HF Token Configuration
|
| 70 |
+
# Token should be set via environment variable or entered by user
|
| 71 |
+
hf_token_input = st.text_input(
|
| 72 |
+
"🔑 HF Token",
|
| 73 |
+
value=os.environ.get("HF_TOKEN", ""),
|
| 74 |
+
type="password",
|
| 75 |
+
help="Required for Cloud mode and gated models. Set via HF_TOKEN env var or enter here.",
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Check for token from input or environment
|
| 79 |
+
hf_token = hf_token_input or os.environ.get("HF_TOKEN", "")
|
| 80 |
+
|
| 81 |
+
# Mode selection based on token availability
|
| 82 |
+
if hf_token:
|
| 83 |
+
inference_mode = st.radio(
|
| 84 |
+
"Select Mode",
|
| 85 |
+
["💻 Local (Faster)", "☁️ Cloud (No Download)"],
|
| 86 |
+
index=0,
|
| 87 |
+
help="Cloud uses HF servers. Local downloads model to your PC."
|
| 88 |
+
)
|
| 89 |
+
use_cloud = "Cloud" in inference_mode
|
| 90 |
+
st.success("🔓 Token configured" + (" • Cloud Mode" if use_cloud else " • Local Mode"))
|
| 91 |
+
else:
|
| 92 |
+
use_cloud = False
|
| 93 |
+
st.warning("⚠️ No token → Local mode only (requires download)")
|
| 94 |
+
inference_mode = "💻 Local (Faster)"
|
| 95 |
+
|
| 96 |
+
st.markdown("---")
|
| 97 |
+
st.info("""
|
| 98 |
+
**Instructions:**
|
| 99 |
+
1. Select a distributor.
|
| 100 |
+
2. Record your voice order.
|
| 101 |
+
3. Watch orders route to manufacturers!
|
| 102 |
+
""")
|
| 103 |
+
|
| 104 |
+
if st.button("🔄 Clear Session", type="secondary"):
|
| 105 |
+
st.session_state.clear()
|
| 106 |
+
st.rerun()
|
| 107 |
+
|
| 108 |
+
# --- Cloud Inference (HuggingFace Inference API) ---
|
| 109 |
+
def transcribe_cloud(audio_data, model_name: str, token: str):
|
| 110 |
+
"""Transcribe audio using HuggingFace Inference API (no local download)."""
|
| 111 |
+
from huggingface_hub import InferenceClient
|
| 112 |
+
import io
|
| 113 |
+
|
| 114 |
+
client = InferenceClient(token=token)
|
| 115 |
+
|
| 116 |
+
# Get audio bytes
|
| 117 |
+
if hasattr(audio_data, 'read'):
|
| 118 |
+
audio_bytes = audio_data.read()
|
| 119 |
+
audio_data.seek(0) # Reset for replay
|
| 120 |
+
else:
|
| 121 |
+
audio_bytes = audio_data
|
| 122 |
+
|
| 123 |
+
# Call HuggingFace Inference API
|
| 124 |
+
result = client.automatic_speech_recognition(
|
| 125 |
+
audio=audio_bytes,
|
| 126 |
+
model=model_name
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Result is either a string or dict with 'text' key
|
| 130 |
+
if isinstance(result, str):
|
| 131 |
+
return result
|
| 132 |
+
else:
|
| 133 |
+
return result.get("text", str(result))
|
| 134 |
+
|
| 135 |
+
# --- Local ASR Engine (Downloads Model) ---
|
| 136 |
+
@st.cache_resource(show_spinner=False)
|
| 137 |
+
def load_asr_engine(model_name: str, token: str = None):
|
| 138 |
+
"""Load ASR engine locally with proper status handling."""
|
| 139 |
+
import torch
|
| 140 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 141 |
+
|
| 142 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 143 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 144 |
+
|
| 145 |
+
# Login if token provided
|
| 146 |
+
if token:
|
| 147 |
+
from huggingface_hub import login
|
| 148 |
+
login(token=token)
|
| 149 |
+
|
| 150 |
+
# Determine model class based on model name
|
| 151 |
+
if "medasr" in model_name:
|
| 152 |
+
from transformers import AutoModelForCTC
|
| 153 |
+
model_class = AutoModelForCTC
|
| 154 |
+
else:
|
| 155 |
+
model_class = AutoModelForSpeechSeq2Seq
|
| 156 |
+
|
| 157 |
+
# Load Model with support for custom code (trust_remote_code=True)
|
| 158 |
+
try:
|
| 159 |
+
model = model_class.from_pretrained(
|
| 160 |
+
model_name,
|
| 161 |
+
dtype=torch_dtype,
|
| 162 |
+
low_cpu_mem_usage=True,
|
| 163 |
+
use_safetensors=True,
|
| 164 |
+
trust_remote_code=True
|
| 165 |
+
)
|
| 166 |
+
except OSError:
|
| 167 |
+
# Fallback for models that might not support safetensors or other issues
|
| 168 |
+
model = model_class.from_pretrained(
|
| 169 |
+
model_name,
|
| 170 |
+
dtype=torch_dtype,
|
| 171 |
+
low_cpu_mem_usage=True,
|
| 172 |
+
trust_remote_code=True
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
model.to(device)
|
| 176 |
+
|
| 177 |
+
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
|
| 178 |
+
|
| 179 |
+
pipe = pipeline(
|
| 180 |
+
"automatic-speech-recognition",
|
| 181 |
+
model=model,
|
| 182 |
+
tokenizer=processor.tokenizer,
|
| 183 |
+
feature_extractor=processor.feature_extractor,
|
| 184 |
+
dtype=torch_dtype,
|
| 185 |
+
device=device,
|
| 186 |
+
trust_remote_code=True
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
return pipe
|
| 190 |
+
|
| 191 |
+
# --- Other Components (Lazy Load) ---
|
| 192 |
+
@st.cache_resource
|
| 193 |
+
def get_db():
|
| 194 |
+
from simulation.manufacturer_db import ManufacturerDB
|
| 195 |
+
return ManufacturerDB(data_dir="data")
|
| 196 |
+
|
| 197 |
+
@st.cache_resource
|
| 198 |
+
def get_preprocessor():
|
| 199 |
+
from core.preprocessor import AudioPreprocessor
|
| 200 |
+
return AudioPreprocessor()
|
| 201 |
+
|
| 202 |
+
@st.cache_resource
|
| 203 |
+
def get_extractor(_db):
|
| 204 |
+
from core.entity_extractor import EntityExtractor
|
| 205 |
+
return EntityExtractor(_db)
|
| 206 |
+
|
| 207 |
+
# --- Model Cache Checker ---
|
| 208 |
+
def check_model_status(model_name: str) -> dict:
|
| 209 |
+
"""Check if model is cached locally and get disk space info."""
|
| 210 |
+
import os
|
| 211 |
+
import shutil
|
| 212 |
+
from pathlib import Path
|
| 213 |
+
|
| 214 |
+
# HuggingFace cache directory
|
| 215 |
+
cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
|
| 216 |
+
model_folder_name = f"models--{model_name.replace('/', '--')}"
|
| 217 |
+
model_cache_path = cache_dir / model_folder_name
|
| 218 |
+
|
| 219 |
+
# Check if model is cached
|
| 220 |
+
is_cached = model_cache_path.exists() and any(model_cache_path.iterdir()) if model_cache_path.exists() else False
|
| 221 |
+
|
| 222 |
+
# Check snapshots folder for actual model files
|
| 223 |
+
snapshots_path = model_cache_path / "snapshots" if model_cache_path.exists() else None
|
| 224 |
+
has_model_files = False
|
| 225 |
+
if snapshots_path and snapshots_path.exists():
|
| 226 |
+
for snapshot in snapshots_path.iterdir():
|
| 227 |
+
# Check for safetensors or bin files
|
| 228 |
+
if any(f.suffix in ['.safetensors', '.bin'] for f in snapshot.iterdir() if f.is_file()):
|
| 229 |
+
has_model_files = True
|
| 230 |
+
break
|
| 231 |
+
|
| 232 |
+
# Get free disk space (C: drive on Windows)
|
| 233 |
+
try:
|
| 234 |
+
disk_usage = shutil.disk_usage(cache_dir if cache_dir.exists() else Path.home())
|
| 235 |
+
free_gb = disk_usage.free / (1024 ** 3)
|
| 236 |
+
except:
|
| 237 |
+
free_gb = -1
|
| 238 |
+
|
| 239 |
+
# Model sizes (approximate)
|
| 240 |
+
model_sizes = {
|
| 241 |
+
"openai/whisper-tiny": 0.15,
|
| 242 |
+
"openai/whisper-small": 0.5,
|
| 243 |
+
"openai/whisper-medium": 1.5,
|
| 244 |
+
"openai/whisper-large-v3": 3.1,
|
| 245 |
+
"google/medasr": 0.3, # ~300MB
|
| 246 |
+
}
|
| 247 |
+
required_gb = model_sizes.get(model_name, 2.0)
|
| 248 |
+
|
| 249 |
+
return {
|
| 250 |
+
"is_cached": is_cached and has_model_files,
|
| 251 |
+
"free_gb": round(free_gb, 1),
|
| 252 |
+
"required_gb": required_gb,
|
| 253 |
+
"has_space": free_gb >= required_gb or is_cached,
|
| 254 |
+
"cache_path": str(model_cache_path) if model_cache_path.exists() else None
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
# Load non-blocking components
|
| 258 |
+
db = get_db()
|
| 259 |
+
preprocessor = get_preprocessor()
|
| 260 |
+
extractor = get_extractor(db)
|
| 261 |
+
|
| 262 |
+
# --- Main Content ---
|
| 263 |
+
st.markdown('<h1 class="main-header">🏥 Order Processing Center</h1>', unsafe_allow_html=True)
|
| 264 |
+
st.markdown(f'<p class="sub-header">Reviewing orders from: <strong>{distributor}</strong></p>', unsafe_allow_html=True)
|
| 265 |
+
|
| 266 |
+
# Smart Model Status Indicator
|
| 267 |
+
model_status = check_model_status(asr_model)
|
| 268 |
+
|
| 269 |
+
# Use a flex container for the status badge (aligned right) to avoid empty column artifacts
|
| 270 |
+
status_html = ""
|
| 271 |
+
if use_cloud:
|
| 272 |
+
status_html = '''
|
| 273 |
+
<div class="status-ready" style="border-color: rgba(139, 92, 246, 0.3); background: rgba(139, 92, 246, 0.1); color: #a78bfa;">
|
| 274 |
+
<span class="status-dot" style="background: #a78bfa;"></span>
|
| 275 |
+
☁️ Cloud Ready
|
| 276 |
+
</div>
|
| 277 |
+
'''
|
| 278 |
+
elif model_status["is_cached"]:
|
| 279 |
+
status_html = '''
|
| 280 |
+
<div class="status-ready">
|
| 281 |
+
<span class="status-dot green"></span>
|
| 282 |
+
✅ Cached (Local)
|
| 283 |
+
</div>
|
| 284 |
+
'''
|
| 285 |
+
elif model_status["has_space"]:
|
| 286 |
+
status_html = f'''
|
| 287 |
+
<div class="status-loading" style="border-color: rgba(251, 191, 36, 0.3); background: rgba(251, 191, 36, 0.1); color: #fbbf24;">
|
| 288 |
+
<span class="status-dot" style="background: #fbbf24;"></span>
|
| 289 |
+
⬇️ Download ({model_status["required_gb"]}GB)
|
| 290 |
+
</div>
|
| 291 |
+
'''
|
| 292 |
+
else:
|
| 293 |
+
status_html = f'''
|
| 294 |
+
<div class="status-loading" style="border-color: rgba(239, 68, 68, 0.3); background: rgba(239, 68, 68, 0.1); color: #ef4444;">
|
| 295 |
+
<span class="status-dot" style="background: #ef4444;"></span>
|
| 296 |
+
⚠️ Low Space ({model_status["free_gb"]}GB free)
|
| 297 |
+
</div>
|
| 298 |
+
'''
|
| 299 |
+
st.warning(f"Need {model_status['required_gb']}GB, only {model_status['free_gb']}GB free. Choose a smaller model or free disk space.")
|
| 300 |
+
|
| 301 |
+
# Render Status aligned to right
|
| 302 |
+
if status_html:
|
| 303 |
+
st.markdown(f'<div style="display: flex; justify-content: flex-end; margin-bottom: 20px;">{status_html}</div>', unsafe_allow_html=True)
|
| 304 |
+
|
| 305 |
+
# Download confirmation state
|
| 306 |
+
if 'download_approved' not in st.session_state:
|
| 307 |
+
st.session_state.download_approved = {}
|
| 308 |
+
|
| 309 |
+
# Show download confirmation ONLY if Local mode AND model not cached AND not yet approved
|
| 310 |
+
if not use_cloud and not model_status["is_cached"] and asr_model not in st.session_state.download_approved:
|
| 311 |
+
with st.container():
|
| 312 |
+
st.markdown("---")
|
| 313 |
+
st.markdown(f"### ⬇️ Download Required")
|
| 314 |
+
st.info(f"""**{asr_model}** is not cached locally.
|
| 315 |
+
|
| 316 |
+
📦 Size: **{model_status['required_gb']}GB**
|
| 317 |
+
💾 Free space: **{model_status['free_gb']}GB**
|
| 318 |
+
📂 Cache location: `C:\\Users\\{os.environ.get('USERNAME', 'User')}\\.cache\\huggingface\\hub\\`
|
| 319 |
+
|
| 320 |
+
💡 **Tip:** Switch to Cloud Mode to avoid downloading!
|
| 321 |
+
""")
|
| 322 |
+
|
| 323 |
+
col_yes, col_no = st.columns(2)
|
| 324 |
+
with col_yes:
|
| 325 |
+
if st.button("✅ Yes, Download", type="primary", use_container_width=True):
|
| 326 |
+
# UI Update: Show "Downloading" in the badge
|
| 327 |
+
status_placeholder.markdown('''
|
| 328 |
+
<div class="status-loading" style="border-color: rgba(59, 130, 246, 0.3); background: rgba(59, 130, 246, 0.1); color: #60a5fa;">
|
| 329 |
+
<span class="status-dot" style="background: #3b82f6; animation: pulse 0.5s infinite;"></span>
|
| 330 |
+
⏳ Downloading...
|
| 331 |
+
</div>
|
| 332 |
+
''', unsafe_allow_html=True)
|
| 333 |
+
|
| 334 |
+
with st.spinner(f"⬇️ Downloading {asr_model}... This may take a while."):
|
| 335 |
+
try:
|
| 336 |
+
# Trigger download and load into cache
|
| 337 |
+
load_asr_engine(asr_model, hf_token)
|
| 338 |
+
st.session_state.download_approved[asr_model] = True
|
| 339 |
+
st.session_state.model_ready = True
|
| 340 |
+
st.success("✅ Download complete! Model is ready.")
|
| 341 |
+
time.sleep(1)
|
| 342 |
+
st.rerun()
|
| 343 |
+
except Exception as e:
|
| 344 |
+
st.error(f"❌ Download failed: {e}")
|
| 345 |
+
with col_no:
|
| 346 |
+
if st.button("❌ Cancel", type="secondary", use_container_width=True):
|
| 347 |
+
st.info("Download cancelled. Select a cached model or use Cloud Mode.")
|
| 348 |
+
|
| 349 |
+
# Layout: Input (Left) vs Output (Right)
|
| 350 |
+
col1, col2 = st.columns([1, 2])
|
| 351 |
+
|
| 352 |
+
with col1:
|
| 353 |
+
# Voice Container
|
| 354 |
+
st.markdown('<div class="voice-container">', unsafe_allow_html=True)
|
| 355 |
+
st.markdown('<h3 style="color: #4facfe; margin: 0 0 10px 0;">Voice Input</h3>', unsafe_allow_html=True)
|
| 356 |
+
|
| 357 |
+
# Example Prompt Tagline
|
| 358 |
+
example_prompt = "Send me 50 strips of Paracetamol, 20 bottles of Ascoril syrup, and also 10 tubes of Betnovate cream."
|
| 359 |
+
st.markdown(f'''
|
| 360 |
+
<div style="background: rgba(79, 172, 254, 0.1); border: 1px dashed rgba(79, 172, 254, 0.4); border-radius: 8px; padding: 12px; margin-bottom: 16px;">
|
| 361 |
+
<span style="color: #4facfe; font-weight: 600; font-size: 0.75rem;">💡 TRY SAYING:</span>
|
| 362 |
+
<p style="color: rgba(255,255,255,0.9); font-style: italic; margin: 8px 0 0 0; font-size: 0.9rem; line-height: 1.5;">"{example_prompt}"</p>
|
| 363 |
+
</div>
|
| 364 |
+
''', unsafe_allow_html=True)
|
| 365 |
+
|
| 366 |
+
st.markdown('<div class="mic-icon">🎙️</div>', unsafe_allow_html=True)
|
| 367 |
+
|
| 368 |
+
tab1, tab2 = st.tabs(["🔴 Record", "📁 Upload"])
|
| 369 |
+
|
| 370 |
+
audio_data = None
|
| 371 |
+
|
| 372 |
+
with tab1:
|
| 373 |
+
try:
|
| 374 |
+
audio_val_rec = st.audio_input("Click to record", label_visibility="collapsed")
|
| 375 |
+
if audio_val_rec:
|
| 376 |
+
audio_data = audio_val_rec
|
| 377 |
+
except AttributeError:
|
| 378 |
+
st.warning("Update Streamlit to use `st.audio_input`.")
|
| 379 |
+
|
| 380 |
+
with tab2:
|
| 381 |
+
audio_val_up = st.file_uploader("Upload Audio", type=['wav', 'mp3'], label_visibility="collapsed")
|
| 382 |
+
if audio_val_up:
|
| 383 |
+
audio_data = audio_val_up
|
| 384 |
+
|
| 385 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 386 |
+
|
| 387 |
+
# Process Audio
|
| 388 |
+
if audio_data:
|
| 389 |
+
st.success("✅ Audio captured!")
|
| 390 |
+
st.audio(audio_data)
|
| 391 |
+
|
| 392 |
+
if st.button("🚀 Process Order", type="primary", use_container_width=True):
|
| 393 |
+
transcription_text = ""
|
| 394 |
+
|
| 395 |
+
if use_cloud:
|
| 396 |
+
# CLOUD MODE - Use HuggingFace Inference API (no download)
|
| 397 |
+
with st.spinner("☁️ Transcribing via Cloud..."):
|
| 398 |
+
try:
|
| 399 |
+
transcription_text = transcribe_cloud(audio_data, asr_model, hf_token)
|
| 400 |
+
st.toast("✅ Cloud Transcription Complete!")
|
| 401 |
+
except Exception as e:
|
| 402 |
+
st.error(f"❌ Cloud API failed: {e}")
|
| 403 |
+
st.info("💡 Try Local mode or check your token/model.")
|
| 404 |
+
st.stop()
|
| 405 |
+
else:
|
| 406 |
+
# LOCAL MODE - Download and run model locally
|
| 407 |
+
with st.spinner("🔄 Loading Local ASR Model..."):
|
| 408 |
+
try:
|
| 409 |
+
asr = load_asr_engine(asr_model, hf_token)
|
| 410 |
+
st.session_state.model_ready = True
|
| 411 |
+
except Exception as e:
|
| 412 |
+
st.error(f"❌ Model load failed: {e}")
|
| 413 |
+
st.stop()
|
| 414 |
+
|
| 415 |
+
with st.spinner("🎧 Transcribing Locally..."):
|
| 416 |
+
processed_audio = preprocessor.process(audio_data)
|
| 417 |
+
result = asr(processed_audio)
|
| 418 |
+
transcription_text = result["text"].replace("</s>", "").strip()
|
| 419 |
+
st.toast("✅ Local Transcription Complete!")
|
| 420 |
+
|
| 421 |
+
# Store transcription
|
| 422 |
+
st.session_state.last_transcription = transcription_text
|
| 423 |
+
|
| 424 |
+
with st.spinner("📦 Extracting Orders..."):
|
| 425 |
+
extracted_orders = extractor.extract(transcription_text)
|
| 426 |
+
|
| 427 |
+
if extracted_orders:
|
| 428 |
+
st.success(f"Found {len(extracted_orders)} items!")
|
| 429 |
+
for order in extracted_orders:
|
| 430 |
+
st.session_state.orders.append(order)
|
| 431 |
+
st.rerun()
|
| 432 |
+
else:
|
| 433 |
+
st.warning("No medicines found. Try: 'Send 20 strips of Augmentin'")
|
| 434 |
+
|
| 435 |
+
st.markdown("---")
|
| 436 |
+
st.markdown("### 📝 Transcription")
|
| 437 |
+
|
| 438 |
+
current_text = st.session_state.get('last_transcription', "")
|
| 439 |
+
st.text_area(
|
| 440 |
+
"Transcription Output",
|
| 441 |
+
current_text,
|
| 442 |
+
height=120,
|
| 443 |
+
disabled=True,
|
| 444 |
+
placeholder="Transcription will appear here...",
|
| 445 |
+
label_visibility="collapsed"
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
with col2:
|
| 449 |
+
st.markdown("### 🏭 Manufacturer Routing")
|
| 450 |
+
|
| 451 |
+
# Get grouped orders from session state
|
| 452 |
+
from simulation.order_queue import OrderQueue
|
| 453 |
+
queue = OrderQueue()
|
| 454 |
+
grouped_orders = queue.get_grouped_orders(db)
|
| 455 |
+
all_manufacturers = db.get_all_manufacturers()
|
| 456 |
+
|
| 457 |
+
# Grid Layout
|
| 458 |
+
import textwrap
|
| 459 |
+
row1_cols = st.columns(2)
|
| 460 |
+
row2_cols = st.columns(2)
|
| 461 |
+
row3_cols = st.columns(2)
|
| 462 |
+
|
| 463 |
+
# 6 Manufacturers -> 3 Rows of 2
|
| 464 |
+
for idx, mfr in enumerate(all_manufacturers):
|
| 465 |
+
if idx < 2:
|
| 466 |
+
col = row1_cols[idx]
|
| 467 |
+
elif idx < 4:
|
| 468 |
+
col = row2_cols[idx - 2]
|
| 469 |
+
elif idx < 6:
|
| 470 |
+
col = row3_cols[idx - 4]
|
| 471 |
+
else:
|
| 472 |
+
continue
|
| 473 |
+
|
| 474 |
+
with col:
|
| 475 |
+
mfr_name = mfr['name']
|
| 476 |
+
orders = grouped_orders.get(mfr_name, [])
|
| 477 |
+
order_count = len(orders)
|
| 478 |
+
|
| 479 |
+
# Determine Visual State
|
| 480 |
+
is_active = order_count > 0
|
| 481 |
+
active_class = "active" if is_active else ""
|
| 482 |
+
badge_class = "active" if is_active else ""
|
| 483 |
+
|
| 484 |
+
# Generate HTML - Single line to prevent Markdown parsing issues
|
| 485 |
+
html_parts = []
|
| 486 |
+
|
| 487 |
+
# 1. Header & Open Body
|
| 488 |
+
html_parts.append(f'<div class="node-card {active_class}">')
|
| 489 |
+
html_parts.append('<div class="node-header">')
|
| 490 |
+
html_parts.append(f'<span class="node-title"><span style="opacity:0.7">🏭</span> {mfr_name}</span>')
|
| 491 |
+
html_parts.append(f'<span class="node-badge {badge_class}">{order_count} Items</span>')
|
| 492 |
+
html_parts.append('</div><div class="node-body">')
|
| 493 |
+
|
| 494 |
+
# 2. Body Content
|
| 495 |
+
if is_active:
|
| 496 |
+
for order in orders:
|
| 497 |
+
# Confidence Logic
|
| 498 |
+
conf = order.get('confidence', 0)
|
| 499 |
+
conf_class = "conf-low"
|
| 500 |
+
if conf >= 90: conf_class = "conf-high"
|
| 501 |
+
elif conf >= 75: conf_class = "conf-med"
|
| 502 |
+
|
| 503 |
+
med_name = order.get('medicine_standardized', order['medicine'])
|
| 504 |
+
dosage = order.get('dosage', '-')
|
| 505 |
+
|
| 506 |
+
html_parts.append(f'<div class="order-chip {conf_class}">')
|
| 507 |
+
html_parts.append('<div class="chip-main">')
|
| 508 |
+
html_parts.append(f'<span class="chip-med">{med_name}</span>')
|
| 509 |
+
html_parts.append(f'<span class="chip-meta">{dosage}</span>')
|
| 510 |
+
html_parts.append('</div>')
|
| 511 |
+
html_parts.append(f'<span class="chip-qty">{order["quantity"]}</span>')
|
| 512 |
+
html_parts.append('</div>')
|
| 513 |
+
else:
|
| 514 |
+
html_parts.append('<div style="color: rgba(255,255,255,0.2); font-style: italic; font-size: 0.85rem; text-align: center; padding: 10px;">Waiting for data...</div>')
|
| 515 |
+
|
| 516 |
+
# 3. Close Body & Card
|
| 517 |
+
html_parts.append('</div></div>')
|
| 518 |
+
|
| 519 |
+
st.markdown("".join(html_parts), unsafe_allow_html=True)
|
| 520 |
+
|
| 521 |
+
# Unknown Orders (Quarantine Node)
|
| 522 |
+
unknowns = grouped_orders.get('Unknown', [])
|
| 523 |
+
if unknowns:
|
| 524 |
+
html_parts = []
|
| 525 |
+
html_parts.append('<div class="node-card active" style="border-color: rgba(255, 51, 102, 0.3); box-shadow: 0 0 20px rgba(255, 51, 102, 0.1);">')
|
| 526 |
+
html_parts.append('<div class="node-header">')
|
| 527 |
+
html_parts.append('<span class="node-title" style="color: #ff3366;"><span>⚠️</span> Quarantine / Unmapped</span>')
|
| 528 |
+
html_parts.append(f'<span class="node-badge" style="background: rgba(255, 51, 102, 0.1); color: #ff3366; border: 1px solid rgba(255, 51, 102, 0.2);">{len(unknowns)} Items</span>')
|
| 529 |
+
html_parts.append('</div><div class="node-body">')
|
| 530 |
+
|
| 531 |
+
for order in unknowns:
|
| 532 |
+
html_parts.append('<div class="order-chip conf-low">')
|
| 533 |
+
html_parts.append('<div class="chip-main">')
|
| 534 |
+
html_parts.append(f'<span class="chip-med" style="color: #ff3366;">{order["medicine"]} (Raw)</span>')
|
| 535 |
+
html_parts.append(f'<span class="chip-meta">Confidence: {order.get("confidence", 0)}%</span>')
|
| 536 |
+
html_parts.append('</div>')
|
| 537 |
+
html_parts.append(f'<span class="chip-qty">{order["quantity"]}</span>')
|
| 538 |
+
html_parts.append('</div>')
|
| 539 |
+
|
| 540 |
+
html_parts.append('</div></div>')
|
| 541 |
+
st.markdown("".join(html_parts), unsafe_allow_html=True)
|
| 542 |
+
|
| 543 |
+
st.markdown("---")
|
| 544 |
+
|
| 545 |
+
# Export Buttons
|
| 546 |
+
if st.session_state.orders:
|
| 547 |
+
from core.excel_exporter import ExcelExporter
|
| 548 |
+
|
| 549 |
+
col_excel, col_csv = st.columns(2)
|
| 550 |
+
|
| 551 |
+
with col_excel:
|
| 552 |
+
excel_data = ExcelExporter.export(st.session_state.orders, db=db)
|
| 553 |
+
st.download_button(
|
| 554 |
+
label="📥 Export to Excel",
|
| 555 |
+
data=excel_data,
|
| 556 |
+
file_name="pharma_orders.xlsx",
|
| 557 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 558 |
+
use_container_width=True
|
| 559 |
+
)
|
| 560 |
+
|
| 561 |
+
with col_csv:
|
| 562 |
+
csv_data = ExcelExporter.export_csv(st.session_state.orders, db=db)
|
| 563 |
+
st.download_button(
|
| 564 |
+
label="📄 Export to CSV",
|
| 565 |
+
data=csv_data,
|
| 566 |
+
file_name="pharma_orders.csv",
|
| 567 |
+
mime="text/csv",
|
| 568 |
+
use_container_width=True
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
# --- Informational Footer (New Section) ---
|
| 572 |
+
# --- Informational Footer (New Section) ---
|
| 573 |
+
footer_html = []
|
| 574 |
+
footer_html.append('<div class="info-container">')
|
| 575 |
+
footer_html.append('<div class="info-grid">')
|
| 576 |
+
|
| 577 |
+
# 1. How to Use Section
|
| 578 |
+
footer_html.append('<div class="info-section">')
|
| 579 |
+
footer_html.append('<h4>💡 How to use it</h4>')
|
| 580 |
+
footer_html.append('<ul class="info-list">')
|
| 581 |
+
footer_html.append('<li class="info-item">')
|
| 582 |
+
footer_html.append('<span>🔹 <span class="info-highlight">Mixed Manufacturers:</span></span>')
|
| 583 |
+
footer_html.append('<span class="info-example">"Send Paracetamol tablet 300 strips, also Azithromycin 50 strips and Volini spray 20 pieces."</span>')
|
| 584 |
+
footer_html.append('</li>')
|
| 585 |
+
footer_html.append('<li class="info-item">')
|
| 586 |
+
footer_html.append('<span>🔹 <span class="info-highlight">Forms & Units:</span></span>')
|
| 587 |
+
footer_html.append('<span class="info-example">"Order 50 bottles of Ascoril syrup, 20 tubes of Betnovate cream, and 10 packs of Prega News."</span>')
|
| 588 |
+
footer_html.append('</li>')
|
| 589 |
+
footer_html.append('<li class="info-item">')
|
| 590 |
+
footer_html.append('<span>🔹 <span class="info-highlight">Pronunciation/Noisy:</span></span>')
|
| 591 |
+
footer_html.append('<span class="info-example">"Uh, give me some Combiflam... maybe 20 strips? And... Zinetac 150."</span>')
|
| 592 |
+
footer_html.append('</li>')
|
| 593 |
+
footer_html.append('</ul></div>')
|
| 594 |
+
|
| 595 |
+
# 2. Medical Areas Section
|
| 596 |
+
footer_html.append('<div class="info-section">')
|
| 597 |
+
footer_html.append('<h4>🏥 Medical Areas Covered</h4>')
|
| 598 |
+
footer_html.append('<ul class="info-list">')
|
| 599 |
+
footer_html.append('<li class="info-item">')
|
| 600 |
+
footer_html.append('<span>🍬 <span class="info-highlight">Syrups</span></span>')
|
| 601 |
+
footer_html.append('<span class="info-example">("50 bottles of Ascoril")</span>')
|
| 602 |
+
footer_html.append('</li>')
|
| 603 |
+
footer_html.append('<li class="info-item">')
|
| 604 |
+
footer_html.append('<span>🧴 <span class="info-highlight">Creams/Gels</span></span>')
|
| 605 |
+
footer_html.append('<span class="info-example">("20 tubes of Betnovate")</span>')
|
| 606 |
+
footer_html.append('</li>')
|
| 607 |
+
footer_html.append('<li class="info-item">')
|
| 608 |
+
footer_html.append('<span>💉 <span class="info-highlight">Injections</span></span>')
|
| 609 |
+
footer_html.append('<span class="info-example">("10 vials of Amikacin")</span>')
|
| 610 |
+
footer_html.append('</li>')
|
| 611 |
+
footer_html.append('<li class="info-item">')
|
| 612 |
+
footer_html.append('<span>💨 <span class="info-highlight">Sprays/Inhalers</span></span>')
|
| 613 |
+
footer_html.append('<span class="info-example">("5 pcs of Volini spray")</span>')
|
| 614 |
+
footer_html.append('</li>')
|
| 615 |
+
footer_html.append('<li class="info-item">')
|
| 616 |
+
footer_html.append('<span>💊 <span class="info-highlight">Tablets/Capsules</span></span>')
|
| 617 |
+
footer_html.append('<span class="info-example">("100 strips of Paracetamol")</span>')
|
| 618 |
+
footer_html.append('</li>')
|
| 619 |
+
footer_html.append('</ul></div>')
|
| 620 |
+
|
| 621 |
+
footer_html.append('</div></div>')
|
| 622 |
+
|
| 623 |
+
st.markdown("".join(footer_html), unsafe_allow_html=True)
|
assets/styles.css
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* =========================================
|
| 2 |
+
PHARMA MATRIX DESIGN SYSTEM
|
| 3 |
+
========================================= */
|
| 4 |
+
:root {
|
| 5 |
+
--glass-bg: rgba(13, 17, 23, 0.7);
|
| 6 |
+
--glass-border: rgba(255, 255, 255, 0.08);
|
| 7 |
+
--neon-cyan: #00f2ea;
|
| 8 |
+
--neon-purple: #ff0099;
|
| 9 |
+
--success-green: #00f260;
|
| 10 |
+
--warning-yellow: #f1c40f;
|
| 11 |
+
--danger-red: #ff3366;
|
| 12 |
+
--text-primary: #e6edf3;
|
| 13 |
+
--text-secondary: #8b95a5;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
.stApp {
|
| 17 |
+
background: radial-gradient(circle at 50% 10%, #1a1f2e 0%, #0a0f1a 100%);
|
| 18 |
+
font-family: 'Inter', sans-serif;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
/* --- ACTIVE DATA NODE (Manufacturer Card) --- */
|
| 22 |
+
.node-card {
|
| 23 |
+
background: rgba(21, 26, 36, 0.6);
|
| 24 |
+
backdrop-filter: blur(12px);
|
| 25 |
+
-webkit-backdrop-filter: blur(12px);
|
| 26 |
+
border: 1px solid var(--glass-border);
|
| 27 |
+
border-radius: 16px;
|
| 28 |
+
padding: 20px;
|
| 29 |
+
margin-bottom: 20px;
|
| 30 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
| 31 |
+
position: relative;
|
| 32 |
+
overflow: hidden;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.node-card.active {
|
| 36 |
+
border-color: rgba(0, 242, 234, 0.3);
|
| 37 |
+
box-shadow: 0 0 20px rgba(0, 242, 234, 0.05);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.node-card.active::before {
|
| 41 |
+
content: '';
|
| 42 |
+
position: absolute;
|
| 43 |
+
top: 0;
|
| 44 |
+
left: 0;
|
| 45 |
+
width: 100%;
|
| 46 |
+
height: 2px;
|
| 47 |
+
background: linear-gradient(90deg, var(--neon-cyan), var(--neon-purple));
|
| 48 |
+
animation: scanline 2s linear infinite;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
.node-header {
|
| 52 |
+
display: flex;
|
| 53 |
+
justify-content: space-between;
|
| 54 |
+
align-items: center;
|
| 55 |
+
margin-bottom: 12px;
|
| 56 |
+
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
|
| 57 |
+
padding-bottom: 10px;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.node-title {
|
| 61 |
+
font-size: 1.1rem;
|
| 62 |
+
font-weight: 700;
|
| 63 |
+
color: var(--text-primary);
|
| 64 |
+
letter-spacing: 0.5px;
|
| 65 |
+
display: flex;
|
| 66 |
+
align-items: center;
|
| 67 |
+
gap: 8px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.node-badge {
|
| 71 |
+
font-size: 0.75rem;
|
| 72 |
+
background: rgba(255, 255, 255, 0.05);
|
| 73 |
+
padding: 4px 8px;
|
| 74 |
+
border-radius: 12px;
|
| 75 |
+
color: var(--text-secondary);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
.node-badge.active {
|
| 79 |
+
background: rgba(0, 242, 234, 0.1);
|
| 80 |
+
color: var(--neon-cyan);
|
| 81 |
+
border: 1px solid rgba(0, 242, 234, 0.2);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
/* --- ORDER CHIPS (Data Units) --- */
|
| 85 |
+
.order-chip {
|
| 86 |
+
display: flex;
|
| 87 |
+
justify-content: space-between;
|
| 88 |
+
align-items: center;
|
| 89 |
+
background: rgba(0, 0, 0, 0.3);
|
| 90 |
+
border-left: 3px solid #555;
|
| 91 |
+
padding: 10px 12px;
|
| 92 |
+
border-radius: 6px;
|
| 93 |
+
margin-bottom: 8px;
|
| 94 |
+
position: relative;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/* Confidence Levels */
|
| 98 |
+
.conf-high {
|
| 99 |
+
border-left-color: var(--success-green);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.conf-med {
|
| 103 |
+
border-left-color: var(--warning-yellow);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.conf-low {
|
| 107 |
+
border-left-color: var(--danger-red);
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
.chip-main {
|
| 111 |
+
display: flex;
|
| 112 |
+
flex-direction: column;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.chip-med {
|
| 116 |
+
font-size: 0.95rem;
|
| 117 |
+
font-weight: 600;
|
| 118 |
+
color: var(--text-primary);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.chip-meta {
|
| 122 |
+
font-size: 0.75rem;
|
| 123 |
+
color: var(--text-secondary);
|
| 124 |
+
display: flex;
|
| 125 |
+
gap: 8px;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.chip-qty {
|
| 129 |
+
color: var(--neon-cyan);
|
| 130 |
+
font-weight: 600;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
/* --- ANIMATIONS --- */
|
| 134 |
+
@keyframes scanline {
|
| 135 |
+
0% {
|
| 136 |
+
transform: translateX(-100%);
|
| 137 |
+
opacity: 0;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
50% {
|
| 141 |
+
opacity: 1;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
100% {
|
| 145 |
+
transform: translateX(100%);
|
| 146 |
+
opacity: 0;
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
@keyframes pulse-ring {
|
| 151 |
+
0% {
|
| 152 |
+
box-shadow: 0 0 0 0 rgba(0, 242, 234, 0.4);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
70% {
|
| 156 |
+
box-shadow: 0 0 0 10px rgba(0, 242, 234, 0);
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
100% {
|
| 160 |
+
box-shadow: 0 0 0 0 rgba(0, 242, 234, 0);
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.pulse {
|
| 165 |
+
animation: pulse-ring 2s infinite;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
/* Headers */
|
| 169 |
+
.main-header {
|
| 170 |
+
font-family: 'Inter', 'Helvetica Neue', sans-serif;
|
| 171 |
+
font-weight: 800;
|
| 172 |
+
font-size: 2.2rem;
|
| 173 |
+
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
|
| 174 |
+
background-clip: text;
|
| 175 |
+
-webkit-background-clip: text;
|
| 176 |
+
-webkit-text-fill-color: transparent;
|
| 177 |
+
margin-bottom: 0;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.sub-header {
|
| 181 |
+
font-family: 'Inter', sans-serif;
|
| 182 |
+
color: #8b95a5;
|
| 183 |
+
font-size: 1rem;
|
| 184 |
+
margin-bottom: 1.5rem;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
/* Status Indicators */
|
| 188 |
+
.status-ready {
|
| 189 |
+
display: inline-flex;
|
| 190 |
+
align-items: center;
|
| 191 |
+
gap: 8px;
|
| 192 |
+
background: rgba(16, 185, 129, 0.1);
|
| 193 |
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
| 194 |
+
color: #10b981;
|
| 195 |
+
padding: 8px 16px;
|
| 196 |
+
border-radius: 20px;
|
| 197 |
+
font-size: 0.85rem;
|
| 198 |
+
font-weight: 500;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.status-loading {
|
| 202 |
+
display: inline-flex;
|
| 203 |
+
align-items: center;
|
| 204 |
+
gap: 8px;
|
| 205 |
+
background: rgba(251, 191, 36, 0.1);
|
| 206 |
+
border: 1px solid rgba(251, 191, 36, 0.3);
|
| 207 |
+
color: #fbbf24;
|
| 208 |
+
padding: 8px 16px;
|
| 209 |
+
border-radius: 20px;
|
| 210 |
+
font-size: 0.85rem;
|
| 211 |
+
font-weight: 500;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
.status-dot {
|
| 215 |
+
width: 8px;
|
| 216 |
+
height: 8px;
|
| 217 |
+
border-radius: 50%;
|
| 218 |
+
animation: pulse 2s infinite;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
.status-dot.green {
|
| 222 |
+
background: #10b981;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.status-dot.yellow {
|
| 226 |
+
background: #fbbf24;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
@keyframes pulse {
|
| 230 |
+
|
| 231 |
+
0%,
|
| 232 |
+
100% {
|
| 233 |
+
opacity: 1;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
50% {
|
| 237 |
+
opacity: 0.5;
|
| 238 |
+
}
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
.box-title {
|
| 242 |
+
font-size: 1rem;
|
| 243 |
+
font-weight: 600;
|
| 244 |
+
color: #4facfe;
|
| 245 |
+
margin-bottom: 12px;
|
| 246 |
+
padding-bottom: 8px;
|
| 247 |
+
border-bottom: 1px solid rgba(79, 172, 254, 0.2);
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.order-item {
|
| 251 |
+
background: rgba(38, 44, 61, 0.8);
|
| 252 |
+
border-radius: 8px;
|
| 253 |
+
padding: 8px 12px;
|
| 254 |
+
margin-bottom: 6px;
|
| 255 |
+
font-size: 0.85rem;
|
| 256 |
+
display: flex;
|
| 257 |
+
justify-content: space-between;
|
| 258 |
+
align-items: center;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
/* Voice Input Section */
|
| 262 |
+
.voice-container {
|
| 263 |
+
background: linear-gradient(135deg, rgba(79, 172, 254, 0.05) 0%, rgba(0, 242, 254, 0.05) 100%);
|
| 264 |
+
border: 1px solid rgba(79, 172, 254, 0.2);
|
| 265 |
+
border-radius: 16px;
|
| 266 |
+
padding: 24px;
|
| 267 |
+
text-align: center;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
.mic-icon {
|
| 271 |
+
font-size: 3rem;
|
| 272 |
+
margin-bottom: 12px;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
/* Hide Streamlit Branding */
|
| 276 |
+
#MainMenu {
|
| 277 |
+
visibility: hidden;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
footer {
|
| 281 |
+
visibility: hidden;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
/* --- INFO FOOTER --- */
|
| 285 |
+
.info-container {
|
| 286 |
+
margin-top: 40px;
|
| 287 |
+
padding: 24px;
|
| 288 |
+
background: rgba(13, 17, 23, 0.4);
|
| 289 |
+
border: 1px solid rgba(255, 255, 255, 0.05);
|
| 290 |
+
border-radius: 16px;
|
| 291 |
+
backdrop-filter: blur(10px);
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.info-grid {
|
| 295 |
+
display: grid;
|
| 296 |
+
grid-template-columns: 1fr 1fr;
|
| 297 |
+
gap: 24px;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
.info-section h4 {
|
| 301 |
+
color: var(--text-primary);
|
| 302 |
+
font-size: 1.1rem;
|
| 303 |
+
margin-bottom: 16px;
|
| 304 |
+
border-bottom: 2px solid var(--neon-cyan);
|
| 305 |
+
display: inline-block;
|
| 306 |
+
padding-bottom: 4px;
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.info-list {
|
| 310 |
+
list-style: none;
|
| 311 |
+
padding: 0;
|
| 312 |
+
margin: 0;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
.info-item {
|
| 316 |
+
margin-bottom: 12px;
|
| 317 |
+
color: var(--text-secondary);
|
| 318 |
+
font-size: 0.9rem;
|
| 319 |
+
display: flex;
|
| 320 |
+
align-items: start;
|
| 321 |
+
gap: 8px;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
.info-highlight {
|
| 325 |
+
color: var(--neon-cyan);
|
| 326 |
+
font-weight: 600;
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
.info-example {
|
| 330 |
+
color: var(--text-secondary);
|
| 331 |
+
font-style: italic;
|
| 332 |
+
opacity: 0.8;
|
| 333 |
+
}
|
core/asr_engine.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
class ASREngine:
|
| 6 |
+
def __init__(self, model_id: str = "openai/whisper-tiny"):
|
| 7 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 8 |
+
self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 9 |
+
self.model_id = model_id
|
| 10 |
+
self.pipe = self._load_model()
|
| 11 |
+
|
| 12 |
+
@st.cache_resource(show_spinner=False)
|
| 13 |
+
def _load_model(_self):
|
| 14 |
+
"""Load model with caching to avoid reloading on every run."""
|
| 15 |
+
|
| 16 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 17 |
+
_self.model_id,
|
| 18 |
+
torch_dtype=_self.torch_dtype,
|
| 19 |
+
low_cpu_mem_usage=True,
|
| 20 |
+
use_safetensors=True
|
| 21 |
+
)
|
| 22 |
+
model.to(_self.device)
|
| 23 |
+
|
| 24 |
+
processor = AutoProcessor.from_pretrained(_self.model_id)
|
| 25 |
+
|
| 26 |
+
pipe = pipeline(
|
| 27 |
+
"automatic-speech-recognition",
|
| 28 |
+
model=model,
|
| 29 |
+
tokenizer=processor.tokenizer,
|
| 30 |
+
feature_extractor=processor.feature_extractor,
|
| 31 |
+
max_new_tokens=128,
|
| 32 |
+
chunk_length_s=30,
|
| 33 |
+
batch_size=16,
|
| 34 |
+
return_timestamps=True,
|
| 35 |
+
torch_dtype=_self.torch_dtype,
|
| 36 |
+
device=_self.device,
|
| 37 |
+
)
|
| 38 |
+
return pipe
|
| 39 |
+
|
| 40 |
+
def transcribe(self, audio_array) -> str:
|
| 41 |
+
"""Transcribe audio array or path."""
|
| 42 |
+
try:
|
| 43 |
+
result = self.pipe(audio_array)
|
| 44 |
+
return result["text"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
return f"Error: {str(e)}"
|
core/entity_extractor.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import json
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from simulation.manufacturer_db import ManufacturerDB
|
| 6 |
+
|
| 7 |
+
class EntityExtractor:
|
| 8 |
+
def __init__(self, db: ManufacturerDB):
|
| 9 |
+
self.db = db
|
| 10 |
+
self.aliases = self._load_aliases()
|
| 11 |
+
|
| 12 |
+
# Form keywords that indicate a medicine nearby
|
| 13 |
+
self.form_keywords = {
|
| 14 |
+
'tablet': ['tablet', 'tab', 'tabs', 'capsule', 'cap', 'caps'],
|
| 15 |
+
'syrup': ['syrup', 'liquid', 'suspension'],
|
| 16 |
+
'injection': ['injection', 'inj', 'vial', 'ampoule'],
|
| 17 |
+
'cream': ['cream', 'gel', 'ointment', 'tube'],
|
| 18 |
+
'spray': ['spray', 'inhaler', 'puff'],
|
| 19 |
+
'drops': ['drops', 'eye drops', 'ear drops'],
|
| 20 |
+
'sachet': ['sachet', 'powder', 'granules']
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
# Unit keywords for quantity extraction
|
| 24 |
+
self.unit_keywords = ['strips', 'strip', 'slips', 'slip', 'bottles', 'bottle',
|
| 25 |
+
'tablets', 'tabs', 'pieces', 'pcs', 'boxes', 'box',
|
| 26 |
+
'packs', 'pack', 'vials', 'vial', 'ampoules']
|
| 27 |
+
|
| 28 |
+
# Spoken number mapping
|
| 29 |
+
self.spoken_numbers = {
|
| 30 |
+
'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
|
| 31 |
+
'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
|
| 32 |
+
'eleven': 11, 'twelve': 12, 'fifteen': 15, 'twenty': 20,
|
| 33 |
+
'twenty-five': 25, 'thirty': 30, 'forty': 40, 'fifty': 50,
|
| 34 |
+
'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90,
|
| 35 |
+
'hundred': 100, 'two hundred': 200, 'three hundred': 300,
|
| 36 |
+
'five hundred': 500, 'thousand': 1000
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def _load_aliases(self) -> Dict:
|
| 40 |
+
"""Load pronunciation aliases from JSON file."""
|
| 41 |
+
alias_path = Path("data/aliases.json")
|
| 42 |
+
if alias_path.exists():
|
| 43 |
+
with open(alias_path, 'r') as f:
|
| 44 |
+
return json.load(f)
|
| 45 |
+
return {}
|
| 46 |
+
|
| 47 |
+
def _normalize_text(self, text: str) -> str:
|
| 48 |
+
"""Normalize input text for parsing."""
|
| 49 |
+
text = text.lower()
|
| 50 |
+
# Remove common ASR artifacts
|
| 51 |
+
text = re.sub(r'</s>|<unk>|<s>', '', text)
|
| 52 |
+
# Remove filler words
|
| 53 |
+
text = re.sub(r'\b(uh|um|like|maybe|please|kindly)\b', '', text)
|
| 54 |
+
# Normalize punctuation
|
| 55 |
+
text = text.replace(",", " , ").replace(".", " ")
|
| 56 |
+
# Convert spoken numbers to digits
|
| 57 |
+
for word, num in self.spoken_numbers.items():
|
| 58 |
+
text = re.sub(rf'\b{word}\b', str(num), text)
|
| 59 |
+
return text.strip()
|
| 60 |
+
|
| 61 |
+
def _resolve_alias(self, word: str) -> str:
|
| 62 |
+
"""Check if word is an alias for a known medicine."""
|
| 63 |
+
word_lower = word.lower()
|
| 64 |
+
for canonical, aliases in self.aliases.items():
|
| 65 |
+
if word_lower in aliases or word_lower == canonical:
|
| 66 |
+
return canonical
|
| 67 |
+
return word
|
| 68 |
+
|
| 69 |
+
def _extract_form(self, segment: str) -> str:
|
| 70 |
+
"""Extract form type from segment."""
|
| 71 |
+
segment_lower = segment.lower()
|
| 72 |
+
for form_type, keywords in self.form_keywords.items():
|
| 73 |
+
for kw in keywords:
|
| 74 |
+
if kw in segment_lower:
|
| 75 |
+
return form_type
|
| 76 |
+
return "tablet" # Default
|
| 77 |
+
|
| 78 |
+
def _extract_quantity(self, segment: str) -> tuple:
|
| 79 |
+
"""Extract quantity and unit from segment."""
|
| 80 |
+
# Pattern 1: Number followed by unit word
|
| 81 |
+
# e.g., "300 strips", "20 bottles"
|
| 82 |
+
qty_pattern = r'(\d+)\s*(' + '|'.join(self.unit_keywords) + r')?'
|
| 83 |
+
match = re.search(qty_pattern, segment, re.IGNORECASE)
|
| 84 |
+
|
| 85 |
+
if match:
|
| 86 |
+
num = match.group(1)
|
| 87 |
+
unit = match.group(2) if match.group(2) else "units"
|
| 88 |
+
# Normalize common typos
|
| 89 |
+
if unit in ['slips', 'slip']:
|
| 90 |
+
unit = 'strips'
|
| 91 |
+
return num, unit
|
| 92 |
+
|
| 93 |
+
return "1", "units" # Default
|
| 94 |
+
|
| 95 |
+
def _extract_dosage(self, segment: str) -> str:
|
| 96 |
+
"""Extract dosage from segment."""
|
| 97 |
+
# Pattern: Number followed by mg/ml/gm
|
| 98 |
+
dosage_match = re.search(r'(\d+)\s*(mg|ml|gm|mcg)', segment, re.IGNORECASE)
|
| 99 |
+
if dosage_match:
|
| 100 |
+
return f"{dosage_match.group(1)}{dosage_match.group(2)}"
|
| 101 |
+
return "-"
|
| 102 |
+
|
| 103 |
+
def extract(self, text: str) -> List[Dict]:
|
| 104 |
+
"""
|
| 105 |
+
Extract medicine entities from text.
|
| 106 |
+
Returns: List of dicts {'medicine': str, 'form': str, 'quantity': str, 'dosage': str}
|
| 107 |
+
"""
|
| 108 |
+
if not text:
|
| 109 |
+
return []
|
| 110 |
+
|
| 111 |
+
# Normalize text
|
| 112 |
+
text = self._normalize_text(text)
|
| 113 |
+
|
| 114 |
+
found_orders = []
|
| 115 |
+
|
| 116 |
+
# Get all known medicines from DB for matching
|
| 117 |
+
known_meds = self.db.medicines['medicine_name'].tolist()
|
| 118 |
+
|
| 119 |
+
# Split by multiple delimiters for multi-item orders
|
| 120 |
+
# Handles: "send", "order", "add", "also", "plus", "then", "and", comma
|
| 121 |
+
delimiters = r'\b(?:send|add|want|need|order|also|plus|then)\b|,|\band\b'
|
| 122 |
+
segments = re.split(delimiters, text)
|
| 123 |
+
|
| 124 |
+
for segment in segments:
|
| 125 |
+
segment = segment.strip()
|
| 126 |
+
if not segment or len(segment) < 3:
|
| 127 |
+
continue
|
| 128 |
+
|
| 129 |
+
# Try to find a medicine match in this segment
|
| 130 |
+
from rapidfuzz import process, fuzz
|
| 131 |
+
|
| 132 |
+
# First, check if any word is a known alias
|
| 133 |
+
words = segment.split()
|
| 134 |
+
resolved_segment = ' '.join([self._resolve_alias(w) for w in words])
|
| 135 |
+
|
| 136 |
+
# Fuzzy match against known medicines
|
| 137 |
+
match = process.extractOne(resolved_segment, known_meds, scorer=fuzz.partial_ratio)
|
| 138 |
+
|
| 139 |
+
if match and match[1] > 75: # Confidence threshold
|
| 140 |
+
med_name = match[0]
|
| 141 |
+
|
| 142 |
+
# Extract form, quantity, dosage
|
| 143 |
+
form = self._extract_form(segment)
|
| 144 |
+
num, unit = self._extract_quantity(segment)
|
| 145 |
+
quantity = f"{num} {unit}"
|
| 146 |
+
|
| 147 |
+
dosage = self._extract_dosage(segment)
|
| 148 |
+
if dosage == "-":
|
| 149 |
+
# Lookup default dosage from DB
|
| 150 |
+
med_row = self.db.medicines[self.db.medicines['medicine_name'] == med_name].iloc[0]
|
| 151 |
+
dosage = med_row['dosage']
|
| 152 |
+
|
| 153 |
+
found_orders.append({
|
| 154 |
+
"medicine": med_name,
|
| 155 |
+
"form": form,
|
| 156 |
+
"quantity": quantity,
|
| 157 |
+
"dosage": dosage,
|
| 158 |
+
"confidence": match[1],
|
| 159 |
+
"original_segment": segment.strip()
|
| 160 |
+
})
|
| 161 |
+
|
| 162 |
+
return found_orders
|
core/excel_exporter.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import io
|
| 3 |
+
|
| 4 |
+
class ExcelExporter:
|
| 5 |
+
@staticmethod
|
| 6 |
+
def _prepare_dataframe(orders: list, db=None) -> pd.DataFrame:
|
| 7 |
+
"""Helper to prepare enriched DataFrame."""
|
| 8 |
+
if not orders:
|
| 9 |
+
return pd.DataFrame()
|
| 10 |
+
|
| 11 |
+
# Enrich data if DB is provided
|
| 12 |
+
enriched_orders = []
|
| 13 |
+
for order in orders:
|
| 14 |
+
row = order.copy()
|
| 15 |
+
if db:
|
| 16 |
+
mfr_info = db.get_manufacturer_by_medicine(order['medicine'])
|
| 17 |
+
if mfr_info:
|
| 18 |
+
row['Manufacturer'] = mfr_info['name']
|
| 19 |
+
row['Standardized Medicine'] = mfr_info['medicine_match']
|
| 20 |
+
else:
|
| 21 |
+
row['Manufacturer'] = "Unknown"
|
| 22 |
+
row['Standardized Medicine'] = "-"
|
| 23 |
+
enriched_orders.append(row)
|
| 24 |
+
|
| 25 |
+
df = pd.DataFrame(enriched_orders)
|
| 26 |
+
|
| 27 |
+
# Rename columns for better readability
|
| 28 |
+
column_map = {
|
| 29 |
+
"medicine": "Medicine Name (Extracted)",
|
| 30 |
+
"quantity": "Quantity",
|
| 31 |
+
"dosage": "Dosage",
|
| 32 |
+
"original_segment": "Raw Voice Segment",
|
| 33 |
+
"Manufacturer": "Manufacturer",
|
| 34 |
+
"Standardized Medicine": "Standardized Name"
|
| 35 |
+
}
|
| 36 |
+
df = df.rename(columns=column_map)
|
| 37 |
+
|
| 38 |
+
# Reorder columns if possible
|
| 39 |
+
desired_order = [
|
| 40 |
+
"Manufacturer",
|
| 41 |
+
"Standardized Name",
|
| 42 |
+
"Medicine Name (Extracted)",
|
| 43 |
+
"Quantity",
|
| 44 |
+
"Dosage",
|
| 45 |
+
"Raw Voice Segment"
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
cols_to_keep = [c for c in desired_order if c in df.columns]
|
| 49 |
+
remaining = [c for c in df.columns if c not in cols_to_keep]
|
| 50 |
+
|
| 51 |
+
return df[cols_to_keep + remaining]
|
| 52 |
+
|
| 53 |
+
@staticmethod
|
| 54 |
+
def export(orders: list, db=None) -> bytes:
|
| 55 |
+
"""Convert list of order dicts to Excel bytes."""
|
| 56 |
+
df = ExcelExporter._prepare_dataframe(orders, db)
|
| 57 |
+
if df.empty:
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
output = io.BytesIO()
|
| 61 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
| 62 |
+
df.to_excel(writer, index=False, sheet_name='Orders')
|
| 63 |
+
|
| 64 |
+
# Auto-adjust column widths
|
| 65 |
+
worksheet = writer.sheets['Orders']
|
| 66 |
+
for idx, col in enumerate(df.columns):
|
| 67 |
+
max_len = max(
|
| 68 |
+
df[col].astype(str).map(len).max(),
|
| 69 |
+
len(col)
|
| 70 |
+
) + 2
|
| 71 |
+
worksheet.column_dimensions[chr(65 + idx)].width = min(max_len, 50)
|
| 72 |
+
|
| 73 |
+
return output.getvalue()
|
| 74 |
+
|
| 75 |
+
@staticmethod
|
| 76 |
+
def export_csv(orders: list, db=None) -> str:
|
| 77 |
+
"""Convert list of order dicts to CSV string."""
|
| 78 |
+
df = ExcelExporter._prepare_dataframe(orders, db)
|
| 79 |
+
if df.empty:
|
| 80 |
+
return ""
|
| 81 |
+
return df.to_csv(index=False).encode('utf-8')
|
core/preprocessor.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import librosa
|
| 2 |
+
import noisereduce as nr
|
| 3 |
+
import soundfile as sf
|
| 4 |
+
import numpy as np
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
class AudioPreprocessor:
|
| 8 |
+
def __init__(self, target_sr: int = 16000):
|
| 9 |
+
self.target_sr = target_sr
|
| 10 |
+
|
| 11 |
+
def process(self, audio_file) -> np.ndarray:
|
| 12 |
+
"""
|
| 13 |
+
Process audio file (path or bytes) for ASR.
|
| 14 |
+
Returns: 16kHz mono audio array.
|
| 15 |
+
"""
|
| 16 |
+
# Load audio (handles both paths and file-like objects)
|
| 17 |
+
try:
|
| 18 |
+
audio, sr = librosa.load(audio_file, sr=self.target_sr, mono=True)
|
| 19 |
+
except Exception as e:
|
| 20 |
+
# Fallback for file-like objects if librosa fails directly
|
| 21 |
+
if hasattr(audio_file, 'read'):
|
| 22 |
+
audio_file.seek(0)
|
| 23 |
+
audio, sr = librosa.load(audio_file, sr=self.target_sr, mono=True)
|
| 24 |
+
else:
|
| 25 |
+
raise e
|
| 26 |
+
|
| 27 |
+
# Noise Reduction (Spectral Gating)
|
| 28 |
+
# Only apply if audio is long enough to have a noise profile
|
| 29 |
+
if len(audio) > self.target_sr * 0.5:
|
| 30 |
+
audio = nr.reduce_noise(y=audio, sr=self.target_sr, stationary=True)
|
| 31 |
+
|
| 32 |
+
# Normalization
|
| 33 |
+
audio = librosa.util.normalize(audio)
|
| 34 |
+
|
| 35 |
+
return audio
|
data/aliases.json
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"paracetamol": [
|
| 3 |
+
"paraacetamole",
|
| 4 |
+
"parcetamol",
|
| 5 |
+
"paracetmal",
|
| 6 |
+
"paracetmol"
|
| 7 |
+
],
|
| 8 |
+
"metformin": [
|
| 9 |
+
"metformine",
|
| 10 |
+
"metforman",
|
| 11 |
+
"metphormin"
|
| 12 |
+
],
|
| 13 |
+
"augmentin": [
|
| 14 |
+
"augmentine",
|
| 15 |
+
"agmentin",
|
| 16 |
+
"augmuntin"
|
| 17 |
+
],
|
| 18 |
+
"azithromycin": [
|
| 19 |
+
"azithromicin",
|
| 20 |
+
"azithro",
|
| 21 |
+
"azith"
|
| 22 |
+
],
|
| 23 |
+
"cetirizine": [
|
| 24 |
+
"cetirizin",
|
| 25 |
+
"cetrizine",
|
| 26 |
+
"cetriz"
|
| 27 |
+
],
|
| 28 |
+
"pantoprazole": [
|
| 29 |
+
"pantoprazol"
|
| 30 |
+
],
|
| 31 |
+
"omeprazole": [
|
| 32 |
+
"omeprazol"
|
| 33 |
+
],
|
| 34 |
+
"calpol": [
|
| 35 |
+
"calpole",
|
| 36 |
+
"calpool"
|
| 37 |
+
],
|
| 38 |
+
"combiflam": [
|
| 39 |
+
"combiflem",
|
| 40 |
+
"combiflame",
|
| 41 |
+
"combiflm"
|
| 42 |
+
],
|
| 43 |
+
"volini": [
|
| 44 |
+
"volinee",
|
| 45 |
+
"voliny"
|
| 46 |
+
],
|
| 47 |
+
"ascoril": [
|
| 48 |
+
"ascorill",
|
| 49 |
+
"ascoryl"
|
| 50 |
+
],
|
| 51 |
+
"dolo": [
|
| 52 |
+
"dollo"
|
| 53 |
+
],
|
| 54 |
+
"clavam": [
|
| 55 |
+
"clavame",
|
| 56 |
+
"clavaum"
|
| 57 |
+
],
|
| 58 |
+
"nise": [
|
| 59 |
+
"nice",
|
| 60 |
+
"nisee"
|
| 61 |
+
],
|
| 62 |
+
"telekast": [
|
| 63 |
+
"telekastl"
|
| 64 |
+
],
|
| 65 |
+
"zinetac": [
|
| 66 |
+
"zinetack",
|
| 67 |
+
"zynetac"
|
| 68 |
+
],
|
| 69 |
+
"manforce": [
|
| 70 |
+
"manforse"
|
| 71 |
+
],
|
| 72 |
+
"unwanted": [
|
| 73 |
+
"unwantd",
|
| 74 |
+
"unwanteed"
|
| 75 |
+
],
|
| 76 |
+
"moxikind": [
|
| 77 |
+
"moxykind"
|
| 78 |
+
],
|
| 79 |
+
"crocin": [
|
| 80 |
+
"crosin",
|
| 81 |
+
"crokeen"
|
| 82 |
+
]
|
| 83 |
+
}
|
data/manufacturers.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,name,code
|
| 2 |
+
mfr_001,Sun Pharma,SUN
|
| 3 |
+
mfr_002,Cipla,CIP
|
| 4 |
+
mfr_003,GlaxoSmithKline,GSK
|
| 5 |
+
mfr_004,Dr. Reddy's,RED
|
| 6 |
+
mfr_005,Lupin,LUP
|
| 7 |
+
mfr_006,Mankind,MAN
|
data/medicines.csv
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
medicine_name,dosage,unit,manufacturer_id
|
| 2 |
+
Augmentin,625mg,strips,mfr_003
|
| 3 |
+
Calpol,500mg,strips,mfr_003
|
| 4 |
+
Crocin,650mg,strips,mfr_001
|
| 5 |
+
Volini,Spray,pcs,mfr_001
|
| 6 |
+
Azithromycin,500mg,strips,mfr_002
|
| 7 |
+
Cetirizine,10mg,strips,mfr_002
|
| 8 |
+
Omez,20mg,strips,mfr_004
|
| 9 |
+
Metformin,500mg,strips,mfr_004
|
| 10 |
+
Pantop,40mg,strips,mfr_001
|
| 11 |
+
Dolo,650mg,strips,mfr_005
|
| 12 |
+
Manforce,50mg,tablets,mfr_006
|
| 13 |
+
Unwanted-72,1.5mg,tablets,mfr_006
|
| 14 |
+
Telekast-L,10mg,strips,mfr_005
|
| 15 |
+
Combiflam,400mg,strips,mfr_001
|
| 16 |
+
Ascoril,Syrup,bottles,mfr_002
|
| 17 |
+
Zinetac,150mg,strips,mfr_003
|
| 18 |
+
Nise,100mg,strips,mfr_004
|
| 19 |
+
Clavam,625mg,strips,mfr_005
|
| 20 |
+
Moxikind-CV,625mg,strips,mfr_006
|
| 21 |
+
Pan-40,40mg,strips,mfr_005
|
| 22 |
+
Revital,Capsule,bottles,mfr_001
|
| 23 |
+
Foracort,Inhaler,pcs,mfr_002
|
| 24 |
+
Asthalin,Inhaler,pcs,mfr_002
|
| 25 |
+
Betnovate,Cream,tube,mfr_003
|
| 26 |
+
Stamlo,5mg,strips,mfr_004
|
| 27 |
+
Gluconorm,500mg,strips,mfr_005
|
| 28 |
+
Prega News,Kit,pack,mfr_006
|
| 29 |
+
Gas-O-Fast,Sachet,pack,mfr_006
|
| 30 |
+
Becosules,Capsule,strips,mfr_003
|
| 31 |
+
Shelcal,500mg,strips,mfr_004
|
| 32 |
+
Allegra,120mg,strips,mfr_002
|
| 33 |
+
Sinarest,Tablet,strips,mfr_006
|
| 34 |
+
Meftal-Spas,Tablet,strips,mfr_006
|
| 35 |
+
Omnigel,Gel,tube,mfr_002
|
| 36 |
+
Digene,Gel,bottle,mfr_001
|
| 37 |
+
Paracetamol,500mg,strips,mfr_001
|
docs/DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pharma Voice Orders - Deployment Guide
|
| 2 |
+
|
| 3 |
+
This guide explains how to deploy and configure the application for production use with large AI models.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## 🚀 Deployment Options Comparison
|
| 8 |
+
|
| 9 |
+
| Feature | Streamlit Cloud (Deploy Button) | Hugging Face Spaces |
|
| 10 |
+
|---------|--------------------------------|---------------------|
|
| 11 |
+
| **Ease of Use** | ⭐⭐⭐⭐⭐ One-click | ⭐⭐⭐⭐ Simple |
|
| 12 |
+
| **Free Tier** | 1GB RAM, limited | 16GB RAM (with GPU upgrade) |
|
| 13 |
+
| **GPU Support** | ❌ No | ✅ Yes (paid: T4, A10G) |
|
| 14 |
+
| **Large Models (Whisper Medium+)** | ⚠️ May timeout | ✅ Works well |
|
| 15 |
+
| **Privacy/Secrets** | ✅ Secrets Manager | ✅ Secrets Manager |
|
| 16 |
+
| **Best For** | Quick demos (tiny model) | Production + Large Models |
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## 📱 Option 1: Streamlit Cloud (The "Deploy" Button)
|
| 21 |
+
|
| 22 |
+
The **Deploy** button in your localhost Streamlit UI deploys directly to **Streamlit Community Cloud**.
|
| 23 |
+
|
| 24 |
+
### How It Works:
|
| 25 |
+
1. Click **Deploy** → **Streamlit Community Cloud**
|
| 26 |
+
2. Connect your GitHub account
|
| 27 |
+
3. Select your repository and branch
|
| 28 |
+
4. Streamlit Cloud builds and hosts your app
|
| 29 |
+
|
| 30 |
+
### ⚠️ Limitations for Your Use Case:
|
| 31 |
+
- **1GB RAM limit** on free tier → Whisper Medium (3GB) will **fail**
|
| 32 |
+
- **No GPU** → Slow inference
|
| 33 |
+
- **Good for**: Demo with `whisper-tiny` only
|
| 34 |
+
|
| 35 |
+
### Setup:
|
| 36 |
+
```bash
|
| 37 |
+
# Push your code to GitHub first
|
| 38 |
+
git add .
|
| 39 |
+
git commit -m "Deploy to Streamlit Cloud"
|
| 40 |
+
git push origin main
|
| 41 |
+
```
|
| 42 |
+
Then click **Deploy** in the Streamlit UI.
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## ☁️ Option 2: Hugging Face Spaces (Recommended)
|
| 47 |
+
|
| 48 |
+
**Best for**: Large models (Whisper Medium, Large, Google Med SR) with HF Token.
|
| 49 |
+
|
| 50 |
+
### Step-by-Step Deployment:
|
| 51 |
+
|
| 52 |
+
#### 1. Create a Hugging Face Space
|
| 53 |
+
1. Go to [huggingface.co/spaces](https://huggingface.co/spaces)
|
| 54 |
+
2. Click **Create new Space**
|
| 55 |
+
3. Select:
|
| 56 |
+
- **SDK**: Streamlit
|
| 57 |
+
- **Hardware**: CPU Basic (free) or upgrade for GPU
|
| 58 |
+
- **Visibility**: Public or Private
|
| 59 |
+
|
| 60 |
+
#### 2. Create `app.py` (Already Done ✅)
|
| 61 |
+
|
| 62 |
+
#### 3. Create `requirements.txt` for HF Spaces
|
| 63 |
+
Create a file **specifically for Spaces** (different from local):
|
| 64 |
+
|
| 65 |
+
```txt
|
| 66 |
+
streamlit
|
| 67 |
+
pandas
|
| 68 |
+
openpyxl
|
| 69 |
+
torch
|
| 70 |
+
transformers
|
| 71 |
+
librosa
|
| 72 |
+
noisereduce
|
| 73 |
+
soundfile
|
| 74 |
+
rapidfuzz
|
| 75 |
+
jiwer
|
| 76 |
+
regex
|
| 77 |
+
webrtcvad
|
| 78 |
+
numpy<2
|
| 79 |
+
huggingface_hub
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
#### 4. Add Your HF Token as a Secret
|
| 83 |
+
1. Go to your Space → **Settings** → **Repository secrets**
|
| 84 |
+
2. Add a new secret:
|
| 85 |
+
- **Name**: `HF_TOKEN`
|
| 86 |
+
- **Value**: Your Hugging Face read token (from [hf.co/settings/tokens](https://huggingface.co/settings/tokens))
|
| 87 |
+
|
| 88 |
+
#### 5. Update Code to Use Token
|
| 89 |
+
In `core/asr_engine.py`, the model will automatically use `HF_TOKEN`:
|
| 90 |
+
|
| 91 |
+
```python
|
| 92 |
+
import os
|
| 93 |
+
from huggingface_hub import login
|
| 94 |
+
|
| 95 |
+
# Auto-login with Space secret
|
| 96 |
+
token = os.environ.get("HF_TOKEN")
|
| 97 |
+
if token:
|
| 98 |
+
login(token=token)
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
#### 6. Push Code to the Space
|
| 102 |
+
```bash
|
| 103 |
+
# Clone your Space
|
| 104 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/pharma-voice-orders
|
| 105 |
+
cd pharma-voice-orders
|
| 106 |
+
|
| 107 |
+
# Copy your files
|
| 108 |
+
cp -r /path/to/your/local/project/* .
|
| 109 |
+
|
| 110 |
+
# Push
|
| 111 |
+
git add .
|
| 112 |
+
git commit -m "Initial deployment"
|
| 113 |
+
git push
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
---
|
| 117 |
+
|
| 118 |
+
## 🔑 Using Gated Models (Google Med SR, etc.)
|
| 119 |
+
|
| 120 |
+
Some models require you to accept terms on the model page before using.
|
| 121 |
+
|
| 122 |
+
### Steps:
|
| 123 |
+
1. Visit the model page (e.g., `google/med-sr-model`)
|
| 124 |
+
2. Click **Agree and access model**
|
| 125 |
+
3. Add your `HF_TOKEN` to the Space secrets (as shown above)
|
| 126 |
+
4. Update your code to specify the model ID:
|
| 127 |
+
|
| 128 |
+
```python
|
| 129 |
+
# In core/asr_engine.py
|
| 130 |
+
model_id = "google/med-speech-recognition" # Example
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
---
|
| 134 |
+
|
| 135 |
+
## 🎯 Recommended Strategy for Your Project
|
| 136 |
+
|
| 137 |
+
| Phase | Platform | Model | Why |
|
| 138 |
+
|-------|----------|-------|-----|
|
| 139 |
+
| **Development** | Local (`uv run start`) | `whisper-tiny` | Fast iteration |
|
| 140 |
+
| **University Demo** | Hugging Face Spaces (Free CPU) | `whisper-small` | Balance of quality + speed |
|
| 141 |
+
| **Production Demo** | HF Spaces + GPU (T4) | `whisper-medium` or Google Med SR | Best quality |
|
| 142 |
+
|
| 143 |
+
---
|
| 144 |
+
|
| 145 |
+
## 🔄 Pre-Caching Models (Avoid First-Run Download)
|
| 146 |
+
|
| 147 |
+
To make the model load instantly for visitors, add a **pre-download script** in your Space:
|
| 148 |
+
|
| 149 |
+
Create `preload.py`:
|
| 150 |
+
```python
|
| 151 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
|
| 152 |
+
|
| 153 |
+
# Pre-download during build
|
| 154 |
+
model_id = "openai/whisper-medium"
|
| 155 |
+
AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
|
| 156 |
+
AutoProcessor.from_pretrained(model_id)
|
| 157 |
+
print("Model pre-cached!")
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
Then add to your Space's `README.md`:
|
| 161 |
+
```yaml
|
| 162 |
+
---
|
| 163 |
+
title: Pharma Voice Orders
|
| 164 |
+
sdk: streamlit
|
| 165 |
+
sdk_version: 1.53.0
|
| 166 |
+
app_file: app.py
|
| 167 |
+
pinned: false
|
| 168 |
+
preload: preload.py
|
| 169 |
+
---
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
---
|
| 173 |
+
|
| 174 |
+
## 📁 Final File Structure for HF Spaces
|
| 175 |
+
|
| 176 |
+
```
|
| 177 |
+
pharma-voice-orders/
|
| 178 |
+
├── app.py # Main Streamlit app
|
| 179 |
+
├── requirements.txt # Python dependencies
|
| 180 |
+
├── preload.py # Model pre-download script
|
| 181 |
+
├── README.md # Space metadata (YAML frontmatter)
|
| 182 |
+
├── core/ # Your modules
|
| 183 |
+
├── simulation/
|
| 184 |
+
├── evaluation/
|
| 185 |
+
└── data/
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
*Last Updated: January 2026*
|
docs/GETTING_STARTED.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pharma Voice Orders - Getting Started
|
| 2 |
+
|
| 3 |
+
This document explains how to set up and run the **Pharma Voice Orders** application.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## 📋 Prerequisites
|
| 8 |
+
|
| 9 |
+
- **Python** 3.12+
|
| 10 |
+
- **[uv](https://github.com/astral-sh/uv)** (Modern Python package manager)
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## 🚀 Quick Start
|
| 15 |
+
|
| 16 |
+
### 1. Install Dependencies
|
| 17 |
+
```bash
|
| 18 |
+
cd pharma-voice-orders
|
| 19 |
+
uv sync
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### 2. Run the Application
|
| 23 |
+
```bash
|
| 24 |
+
uv run start
|
| 25 |
+
```
|
| 26 |
+
This will launch the Streamlit app at `http://localhost:8501`.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## 📦 Available Commands
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
# Run the app
|
| 34 |
+
uv run start
|
| 35 |
+
|
| 36 |
+
# Add a new dependency
|
| 37 |
+
uv add <package-name>
|
| 38 |
+
|
| 39 |
+
# Sync dependencies (install/update)
|
| 40 |
+
uv sync
|
| 41 |
+
|
| 42 |
+
# Run streamlit directly (alternative)
|
| 43 |
+
uv run streamlit run app.py
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## 🔧 Project Structure
|
| 49 |
+
|
| 50 |
+
```
|
| 51 |
+
pharma-voice-orders/
|
| 52 |
+
├── app.py # Main Streamlit entry point
|
| 53 |
+
├── main.py # Script wrapper (for `uv run start`)
|
| 54 |
+
├── pyproject.toml # Project config & dependencies
|
| 55 |
+
├── core/ # Preprocessing, ASR, Entity Extraction, Export
|
| 56 |
+
├── simulation/ # Manufacturer DB, Order Queue
|
| 57 |
+
├── evaluation/ # Metrics (WER, Accuracy)
|
| 58 |
+
└── data/ # CSV files for medicines & manufacturers
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
## ❓ Why Use `uv run`?
|
| 64 |
+
|
| 65 |
+
Using `uv run` ensures the command executes within the project's **isolated virtual environment** (`.venv`), avoiding conflicts with globally installed packages (like Anaconda). This is the recommended way to run Python projects managed by `uv`.
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## 🧪 Testing Your Setup
|
| 70 |
+
|
| 71 |
+
After running `uv run start`:
|
| 72 |
+
1. Open `http://localhost:8501` in your browser.
|
| 73 |
+
2. Select a distributor from the sidebar.
|
| 74 |
+
3. Record or upload an audio file (e.g., "Send 20 strips of Augmentin").
|
| 75 |
+
4. Watch orders get routed to manufacturer boxes.
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
*Last Updated: January 2026*
|
docs/HUGGINGFACE_SPACE_SETUP.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces - Docker Setup Guide
|
| 2 |
+
|
| 3 |
+
## 📋 Fill the Form (Screenshot Reference)
|
| 4 |
+
|
| 5 |
+
| Field | Suggested Value |
|
| 6 |
+
|-------|-----------------|
|
| 7 |
+
| **Owner** | `Khedhar` (your account) ✅ |
|
| 8 |
+
| **Space name** | `pharma-voice-orders` |
|
| 9 |
+
| **Short description** | `Voice-to-Order: Speech-to-text pharmaceutical ordering system using Whisper ASR` |
|
| 10 |
+
| **License** | `MIT` (or leave blank for now) |
|
| 11 |
+
| **Select the Space SDK** | 🐳 **Docker** |
|
| 12 |
+
| **Space hardware** | `CPU basic` (free) or `T4 GPU` for faster inference |
|
| 13 |
+
| **Visibility** | `Public` (for demo) or `Private` |
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## 🐳 Why Docker?
|
| 18 |
+
|
| 19 |
+
1. **Full control** over dependencies and environment
|
| 20 |
+
2. **Pre-download models** during build (instant startup for users)
|
| 21 |
+
3. **Consistent behavior** across local and cloud
|
| 22 |
+
4. **Streamlit works perfectly** with Docker on HF Spaces
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## 📁 Files You Need in Your Space
|
| 27 |
+
|
| 28 |
+
After creating the Space, you'll push these files:
|
| 29 |
+
|
| 30 |
+
```
|
| 31 |
+
pharma-voice-orders/
|
| 32 |
+
├── Dockerfile # Build instructions
|
| 33 |
+
├── requirements.txt # Python packages
|
| 34 |
+
├── app.py # Your Streamlit app
|
| 35 |
+
├── core/ # Your modules
|
| 36 |
+
├── simulation/
|
| 37 |
+
├── evaluation/
|
| 38 |
+
└── data/
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## ⚙️ Adding Secrets (HF Token)
|
| 44 |
+
|
| 45 |
+
After creating the Space:
|
| 46 |
+
1. Go to **Settings** → **Repository secrets**
|
| 47 |
+
2. Add:
|
| 48 |
+
- **Name**: `HF_TOKEN`
|
| 49 |
+
- **Value**: Your token from https://huggingface.co/settings/tokens
|
| 50 |
+
|
| 51 |
+
The app will automatically use this token for gated models.
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
## 🚀 Next Steps
|
| 56 |
+
|
| 57 |
+
1. Fill the form with values above
|
| 58 |
+
2. Click **Create Space**
|
| 59 |
+
3. Clone the Space repo to your local machine
|
| 60 |
+
4. Copy all project files
|
| 61 |
+
5. Push to the Space
|
| 62 |
+
|
| 63 |
+
I'm now creating the `Dockerfile` and updating `app.py` with proper status indicators!
|
evaluation/metrics.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import jiwer
|
| 3 |
+
from rapidfuzz import fuzz
|
| 4 |
+
|
| 5 |
+
class MetricsEvaluator:
|
| 6 |
+
@staticmethod
|
| 7 |
+
def calculate_wer(reference: str, hypothesis: str) -> float:
|
| 8 |
+
"""Calculate Word Error Rate."""
|
| 9 |
+
if not reference or not hypothesis:
|
| 10 |
+
return 1.0
|
| 11 |
+
return jiwer.wer(reference, hypothesis)
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def calculate_entity_accuracy(expected_entities: list, extracted_entities: list) -> float:
|
| 15 |
+
"""
|
| 16 |
+
Calculate accuracy of extracted entities vs ground truth.
|
| 17 |
+
Simple logic: (matches / total_expected)
|
| 18 |
+
"""
|
| 19 |
+
if not expected_entities:
|
| 20 |
+
return 0.0
|
| 21 |
+
|
| 22 |
+
matches = 0
|
| 23 |
+
for exp in expected_entities:
|
| 24 |
+
# Check if this expected medicine was found in extracted list
|
| 25 |
+
found = False
|
| 26 |
+
for ext in extracted_entities:
|
| 27 |
+
if fuzz.ratio(exp['medicine'].lower(), ext['medicine'].lower()) > 85:
|
| 28 |
+
found = True
|
| 29 |
+
break
|
| 30 |
+
if found:
|
| 31 |
+
matches += 1
|
| 32 |
+
|
| 33 |
+
return matches / len(expected_entities)
|
main.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import subprocess
|
| 3 |
+
|
| 4 |
+
def main():
|
| 5 |
+
"""Entry point for the application script."""
|
| 6 |
+
# Use subprocess to run streamlit command in the current environment
|
| 7 |
+
cmd = [sys.executable, "-m", "streamlit", "run", "app.py"]
|
| 8 |
+
subprocess.run(cmd)
|
| 9 |
+
|
| 10 |
+
if __name__ == "__main__":
|
| 11 |
+
main()
|
prompts/asr_prompt_guide.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ASR Prompt Engineering Guide for Pharma Voice Orders
|
| 2 |
+
|
| 3 |
+
> **Purpose**: Define expected voice order formats, sample patterns, and entity schema to improve transcription accuracy and structured data extraction.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Expected Order Format
|
| 8 |
+
|
| 9 |
+
The ASR model should recognize **medicine orders** in the following patterns:
|
| 10 |
+
|
| 11 |
+
### Pattern 1: Medicine First
|
| 12 |
+
```
|
| 13 |
+
<Medicine Name> <Form> <Quantity> <Unit>
|
| 14 |
+
```
|
| 15 |
+
**Example**: "Paracetamol tablet 300 strips"
|
| 16 |
+
|
| 17 |
+
### Pattern 2: Form First
|
| 18 |
+
```
|
| 19 |
+
<Form> <Medicine Name> <Quantity> <Unit>
|
| 20 |
+
```
|
| 21 |
+
**Example**: "Tablet Paracetamol 300 strips"
|
| 22 |
+
|
| 23 |
+
### Pattern 3: Quantity First
|
| 24 |
+
```
|
| 25 |
+
<Quantity> <Unit> <Medicine Name> [<Dosage>]
|
| 26 |
+
```
|
| 27 |
+
**Example**: "20 strips Augmentin 625"
|
| 28 |
+
|
| 29 |
+
### Pattern 4: Comma-Separated List
|
| 30 |
+
```
|
| 31 |
+
<Order1>, <Order2>, <Order3>
|
| 32 |
+
```
|
| 33 |
+
**Example**: "Paracetamol 100 strips, Metformin 50 strips, Crocin 30 strips"
|
| 34 |
+
|
| 35 |
+
### Pattern 5: Connector Words
|
| 36 |
+
```
|
| 37 |
+
<Order1> and/also/plus/then <Order2>
|
| 38 |
+
```
|
| 39 |
+
**Example**: "Send Paracetamol 100 also Metformin 50"
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## Entity Schema
|
| 44 |
+
|
| 45 |
+
Each extracted order should contain:
|
| 46 |
+
|
| 47 |
+
| Field | Type | Description | Example |
|
| 48 |
+
|-------|------|-------------|---------|
|
| 49 |
+
| `medicine` | string | Medicine name (as heard) | "Paracetamol" |
|
| 50 |
+
| `form` | string | Tablet, Syrup, Injection, etc. | "tablet" |
|
| 51 |
+
| `quantity` | string | Number + Unit | "300 strips" |
|
| 52 |
+
| `dosage` | string | Strength (mg, ml, etc.) | "500mg" |
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Sample Voice Orders (for Testing)
|
| 57 |
+
|
| 58 |
+
### Simple Orders
|
| 59 |
+
1. "Send 20 strips of Augmentin 625"
|
| 60 |
+
2. "Paracetamol tablet 100 strips"
|
| 61 |
+
3. "Tablet Metformin 500mg 50 strips"
|
| 62 |
+
4. "Order Crocin 650 30 strips"
|
| 63 |
+
5. "50 bottles of Ascoril syrup"
|
| 64 |
+
6. "20 tubes of Betnovate cream"
|
| 65 |
+
7. "10 vials of Amikacin injection"
|
| 66 |
+
|
| 67 |
+
### Multi-Item Orders
|
| 68 |
+
1. "Paracetamol 100 strips, Metformin 50 strips, Crocin 30 strips"
|
| 69 |
+
2. "Send Augmentin 20 strips also Calpol 15 strips and Dolo 10 strips"
|
| 70 |
+
3. "I need 50 Azithromycin, 30 Cetirizine, and 20 Omez"
|
| 71 |
+
|
| 72 |
+
### Complex/Noisy Orders
|
| 73 |
+
1. "Uh, send me Paracetamol, maybe 100? And also some Metformin"
|
| 74 |
+
2. "Tablet Paraacetamole 300 slips" (misspelling of Paracetamol, slips instead of strips)
|
| 75 |
+
3. "Give me twenty strips of Aug-mentin six two five"
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## Form Keywords
|
| 80 |
+
|
| 81 |
+
The model should recognize these form indicators:
|
| 82 |
+
|
| 83 |
+
| Form Type | Keywords |
|
| 84 |
+
|-----------|----------|
|
| 85 |
+
| Tablet | tablet, tab, tabs, capsule, cap, caps |
|
| 86 |
+
| Syrup | syrup, liquid, bottle, suspension |
|
| 87 |
+
| Injection | injection, inj, vial, ampoule |
|
| 88 |
+
| Cream/Gel | cream, gel, ointment, tube |
|
| 89 |
+
| Spray | spray, inhaler, puff |
|
| 90 |
+
| Drops | drops, eye drops, ear drops |
|
| 91 |
+
| Sachet | sachet, powder, granules |
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
## Unit Keywords
|
| 96 |
+
|
| 97 |
+
| Unit Type | Keywords |
|
| 98 |
+
|-----------|----------|
|
| 99 |
+
| Strips | strips, strip, slips, slip |
|
| 100 |
+
| Bottles | bottles, bottle, btl |
|
| 101 |
+
| Tablets | tablets, tabs, pieces, pcs |
|
| 102 |
+
| Boxes | boxes, box, packs, pack |
|
| 103 |
+
| Vials | vials, vial, ampoules |
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Common Pronunciation Variations
|
| 108 |
+
|
| 109 |
+
| Correct Name | Common Variations |
|
| 110 |
+
|--------------|-------------------|
|
| 111 |
+
| Paracetamol | paraacetamole, parcetamol, paracetmal |
|
| 112 |
+
| Metformin | metformine, metforman, metphormin |
|
| 113 |
+
| Augmentin | augmentine, agmentin, augmuntin |
|
| 114 |
+
| Azithromycin | azithromicin, azithro, azith |
|
| 115 |
+
| Cetirizine | cetirizin, cetrizine, cetriz |
|
| 116 |
+
| Pantoprazole | pantop, pantoprazol |
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
+
## Structured Output Target
|
| 121 |
+
|
| 122 |
+
After processing, each order should be structured as:
|
| 123 |
+
|
| 124 |
+
```json
|
| 125 |
+
{
|
| 126 |
+
"medicine": "Paracetamol",
|
| 127 |
+
"medicine_standardized": "Crocin", // Matched from DB
|
| 128 |
+
"form": "tablet",
|
| 129 |
+
"quantity": "300 strips",
|
| 130 |
+
"dosage": "650mg",
|
| 131 |
+
"manufacturer": "Sun Pharma",
|
| 132 |
+
"original_segment": "Paracetamol tablet 300 strips"
|
| 133 |
+
}
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
---
|
| 137 |
+
|
| 138 |
+
## Tips for Model Training
|
| 139 |
+
|
| 140 |
+
1. **Normalize Numbers**: Convert "twenty" → 20, "hundred" → 100
|
| 141 |
+
2. **Handle Filler Words**: Ignore "uh", "um", "like", "maybe"
|
| 142 |
+
3. **Fuzzy Match Medicine Names**: Use 80%+ confidence threshold
|
| 143 |
+
4. **Default Values**: If unit not specified, use DB default
|
| 144 |
+
5. **Case Insensitive**: All matching should be lowercase
|
| 145 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["hatchling"]
|
| 3 |
+
build-backend = "hatchling.build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "pharma-voice-orders"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
description = "Voice-to-Order: Streamlit app for pharmaceutical distributors using Speech-to-Text (Whisper) and entity extraction."
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.12"
|
| 11 |
+
dependencies = [
|
| 12 |
+
"jiwer>=4.0.0",
|
| 13 |
+
"librosa>=0.11.0",
|
| 14 |
+
"noisereduce>=3.0.3",
|
| 15 |
+
"numpy<2",
|
| 16 |
+
"openpyxl>=3.1.5",
|
| 17 |
+
"pandas>=2.3.3",
|
| 18 |
+
"rapidfuzz>=3.14.3",
|
| 19 |
+
"regex>=2026.1.15",
|
| 20 |
+
"soundfile>=0.13.1",
|
| 21 |
+
"streamlit>=1.53.0",
|
| 22 |
+
"torch>=2.9.1",
|
| 23 |
+
"transformers>=4.57.6",
|
| 24 |
+
"webrtcvad>=2.0.10",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
[project.scripts]
|
| 28 |
+
start = "main:main"
|
| 29 |
+
|
| 30 |
+
[tool.hatch.build.targets.wheel]
|
| 31 |
+
packages = ["core", "simulation", "evaluation"]
|
| 32 |
+
|
| 33 |
+
[tool.uv.sources]
|
| 34 |
+
transformers = { git = "https://github.com/huggingface/transformers.git", rev = "65dc261512cbdb1ee72b88ae5b222f2605aad8e5" }
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
openpyxl
|
| 4 |
+
torch
|
| 5 |
+
transformers
|
| 6 |
+
librosa
|
| 7 |
+
noisereduce
|
| 8 |
+
soundfile
|
| 9 |
+
rapidfuzz
|
| 10 |
+
jiwer
|
| 11 |
+
regex
|
| 12 |
+
webrtcvad
|
| 13 |
+
numpy<2.0.0
|
| 14 |
+
huggingface_hub
|
simulation/manufacturer_db.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from rapidfuzz import process, fuzz
|
| 5 |
+
|
| 6 |
+
class ManufacturerDB:
|
| 7 |
+
def __init__(self, data_dir: str = "data"):
|
| 8 |
+
self.data_dir = Path(data_dir)
|
| 9 |
+
self.manufacturers = self._load_manufacturers()
|
| 10 |
+
self.medicines = self._load_medicines()
|
| 11 |
+
self.aliases = self._load_aliases()
|
| 12 |
+
|
| 13 |
+
def _load_manufacturers(self) -> pd.DataFrame:
|
| 14 |
+
path = self.data_dir / "manufacturers.csv"
|
| 15 |
+
if not path.exists():
|
| 16 |
+
return pd.DataFrame(columns=["id", "name", "code"])
|
| 17 |
+
return pd.read_csv(path)
|
| 18 |
+
|
| 19 |
+
def _load_medicines(self) -> pd.DataFrame:
|
| 20 |
+
path = self.data_dir / "medicines.csv"
|
| 21 |
+
if not path.exists():
|
| 22 |
+
return pd.DataFrame(columns=["medicine_name", "dosage", "unit", "manufacturer_id"])
|
| 23 |
+
return pd.read_csv(path)
|
| 24 |
+
|
| 25 |
+
def _load_aliases(self) -> dict:
|
| 26 |
+
"""Load pronunciation aliases from JSON file."""
|
| 27 |
+
path = self.data_dir / "aliases.json"
|
| 28 |
+
if path.exists():
|
| 29 |
+
with open(path, 'r') as f:
|
| 30 |
+
return json.load(f)
|
| 31 |
+
return {}
|
| 32 |
+
|
| 33 |
+
def _resolve_alias(self, name: str) -> str:
|
| 34 |
+
"""Check if name is an alias for a known medicine."""
|
| 35 |
+
name_lower = name.lower()
|
| 36 |
+
for canonical, aliases in self.aliases.items():
|
| 37 |
+
if name_lower in aliases or name_lower == canonical:
|
| 38 |
+
return canonical
|
| 39 |
+
return name
|
| 40 |
+
|
| 41 |
+
def get_all_manufacturers(self) -> list:
|
| 42 |
+
"""Return list of manufacturer dicts."""
|
| 43 |
+
return self.manufacturers.to_dict('records')
|
| 44 |
+
|
| 45 |
+
def get_manufacturer_by_medicine(self, medicine_name: str) -> dict:
|
| 46 |
+
"""
|
| 47 |
+
Find manufacturer for a given medicine name using fuzzy matching.
|
| 48 |
+
Returns manufacturer dict or None.
|
| 49 |
+
"""
|
| 50 |
+
# Resolve potential alias first
|
| 51 |
+
resolved_name = self._resolve_alias(medicine_name)
|
| 52 |
+
|
| 53 |
+
# Get list of known medicines
|
| 54 |
+
known_meds = self.medicines['medicine_name'].tolist()
|
| 55 |
+
|
| 56 |
+
# Fuzzy match to find closest medicine name
|
| 57 |
+
match = process.extractOne(resolved_name, known_meds, scorer=fuzz.WRatio)
|
| 58 |
+
|
| 59 |
+
if not match or match[1] < 75: # Raised confidence threshold
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
dataset_med_name = match[0]
|
| 63 |
+
|
| 64 |
+
# Look up manufacturer ID
|
| 65 |
+
med_row = self.medicines[self.medicines['medicine_name'] == dataset_med_name].iloc[0]
|
| 66 |
+
mfr_id = med_row['manufacturer_id']
|
| 67 |
+
|
| 68 |
+
# Get manufacturer details
|
| 69 |
+
mfr_row = self.manufacturers[self.manufacturers['id'] == mfr_id].iloc[0]
|
| 70 |
+
|
| 71 |
+
return {
|
| 72 |
+
"id": mfr_id,
|
| 73 |
+
"name": mfr_row["name"],
|
| 74 |
+
"medicine_match": dataset_med_name, # Return the standardized name
|
| 75 |
+
"confidence": match[1]
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
def get_orders_by_manufacturer(self, current_orders: list) -> dict:
|
| 79 |
+
"""
|
| 80 |
+
Group a list of extracted orders by manufacturer.
|
| 81 |
+
Returns: { "Sun Pharma": [orders...], "Cipla": [orders...] }
|
| 82 |
+
"""
|
| 83 |
+
grouped = {mfr: [] for mfr in self.manufacturers['name'].tolist()}
|
| 84 |
+
grouped['Unknown'] = [] # For unmapped medicines
|
| 85 |
+
|
| 86 |
+
for order in current_orders:
|
| 87 |
+
med_name = order.get('medicine')
|
| 88 |
+
mfr_info = self.get_manufacturer_by_medicine(med_name)
|
| 89 |
+
|
| 90 |
+
if mfr_info:
|
| 91 |
+
# Update order with standardized name
|
| 92 |
+
order['medicine_standardized'] = mfr_info['medicine_match']
|
| 93 |
+
grouped[mfr_info['name']].append(order)
|
| 94 |
+
else:
|
| 95 |
+
grouped['Unknown'].append(order)
|
| 96 |
+
|
| 97 |
+
return grouped
|
| 98 |
+
|
simulation/order_queue.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from .manufacturer_db import ManufacturerDB
|
| 4 |
+
|
| 5 |
+
class OrderQueue:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
# Initialize session state for orders if not exists
|
| 8 |
+
if 'orders' not in st.session_state:
|
| 9 |
+
st.session_state.orders = []
|
| 10 |
+
|
| 11 |
+
def add_order(self, order: Dict):
|
| 12 |
+
"""
|
| 13 |
+
Add a new order to the queue.
|
| 14 |
+
Order dict should contain: {'medicine': str, 'quantity': str, 'dosage': str}
|
| 15 |
+
"""
|
| 16 |
+
st.session_state.orders.append(order)
|
| 17 |
+
|
| 18 |
+
def get_all_orders(self) -> List[Dict]:
|
| 19 |
+
return st.session_state.orders
|
| 20 |
+
|
| 21 |
+
def clear_queue(self):
|
| 22 |
+
st.session_state.orders = []
|
| 23 |
+
|
| 24 |
+
def get_grouped_orders(self, db: ManufacturerDB) -> Dict[str, List[Dict]]:
|
| 25 |
+
"""Group all current orders by manufacturer."""
|
| 26 |
+
return db.get_orders_by_manufacturer(st.session_state.orders)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|