Spaces:

cracker0935
/

adtrack-backend

Sleeping

App Files Files Community

cracker0935 commited on Dec 23, 2025

Commit

eb48929

verified ·

1 Parent(s): 34732f8

add Backend_files

Browse files

Files changed (6) hide show

Dockerfile +23 -0
best_alzheimer_model.pth +3 -0
main.py +150 -0
model_arch.py +73 -0
preprocessing.py +87 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use Python 3.9
+FROM python:3.9
+# Set working directory
+WORKDIR /app
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Install dependencies
+# Added --no-cache-dir to keep image small
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Create a writable directory for the model download
+# Hugging Face Spaces runs as a non-root user (user 1000)
+RUN mkdir -p /app/model_cache && chmod 777 /app/model_cache
+# Start the application
+# Note: HF Spaces expects the app to run on port 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

best_alzheimer_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f96c6a0d0eccc515b9e17534436941870915081cbed439c50021c3051dd4fb54
+size 574490709

main.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer
+from typing import List
+from fastapi.middleware.cors import CORSMiddleware
+import os
+import requests  # <--- Added this
+from model_arch import ResearchHybridModel
+from preprocessing import ChaParser
+CONFIG = {
+    'model_name': 'microsoft/deberta-base',
+    'max_seq_len': 64,
+    'max_word_len': 40,
+    'device': torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+    'threshold': 0.20,
+    # PASTE YOUR COPIED HUGGING FACE LINK BELOW
+    'model_url': os.getenv('MODEL_URL')
+}
+ml_components = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("Loading Model and Tokenizer...")
+    ml_components['tokenizer'] = AutoTokenizer.from_pretrained(CONFIG['model_name'])
+    # --- MODEL DOWNLOAD LOGIC START ---
+    model_path = "best_alzheimer_model.pth"
+    if not os.path.exists(model_path):
+        print(f"Model file not found. Downloading from Hugging Face...")
+        try:
+            response = requests.get(CONFIG['model_url'], stream=True)
+            response.raise_for_status()
+            with open(model_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            print("Download complete.")
+        except Exception as e:
+            print(f"Error downloading model: {e}")
+            raise RuntimeError("Failed to download model file")
+    # --- MODEL DOWNLOAD LOGIC END ---
+    model = ResearchHybridModel(model_name=CONFIG['model_name'])
+    # Load state dict
+    state_dict = torch.load(model_path, map_location=CONFIG['device'])
+    if list(state_dict.keys())[0].startswith('module.'):
+        state_dict = {k[7:]: v for k, v in state_dict.items()}
+    model.load_state_dict(state_dict)
+    model.to(CONFIG['device'])
+    model.eval()
+    ml_components['model'] = model
+    print("Model Loaded Successfully.")
+    yield
+    ml_components.clear()
+app = FastAPI(lifespan=lifespan)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://adtrack.onrender.com"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class SentenceAttention(BaseModel):
+    sentence: str
+    attention_score: float
+class PredictionResponse(BaseModel):
+    filename: str
+    prediction: str
+    confidence: float
+    is_dementia: bool
+    attention_map: List[SentenceAttention]
+@app.post("/predict/cha", response_model=PredictionResponse)
+async def predict_cha_file(file: UploadFile = File(...)):
+    if not file.filename.endswith('.cha'):
+        raise HTTPException(status_code=400, detail="Only .cha files are supported")
+    contents = await file.read()
+    lines = contents.splitlines()
+    parser = ChaParser()
+    sentences, features, _ = parser.parse(lines)
+    if not sentences:
+        raise HTTPException(status_code=400, detail="No *PAR lines found in file")
+    if len(sentences) > CONFIG['max_seq_len']:
+        sentences = sentences[-CONFIG['max_seq_len']:]
+        features = features[-CONFIG['max_seq_len']:]
+    tokenizer = ml_components['tokenizer']
+    model = ml_components['model']
+    encoding = tokenizer(
+        sentences,
+        padding='max_length',
+        truncation=True,
+        max_length=CONFIG['max_word_len'],
+        return_tensors='pt'
+    )
+    ids = encoding['input_ids'].unsqueeze(0).to(CONFIG['device'])
+    mask = encoding['attention_mask'].unsqueeze(0).to(CONFIG['device'])
+    feats = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(CONFIG['device'])
+    lengths = torch.tensor([len(sentences)])
+    with torch.no_grad():
+        logits, attn_weights_tensor = model(ids, mask, feats, lengths)
+        prob = F.softmax(logits, dim=1)[:, 1].item()
+    attn_weights = attn_weights_tensor.cpu().numpy().flatten()
+    attn_weights = attn_weights[:len(sentences)]
+    # Normalize attention for frontend display
+    if len(attn_weights) > 0:
+        w_min, w_max = attn_weights.min(), attn_weights.max()
+        if w_max - w_min > 0:
+            attn_weights = (attn_weights - w_min) / (w_max - w_min)
+    prediction_label = "DEMENTIA" if prob >= CONFIG['threshold'] else "HEALTHY CONTROL"
+    attention_map = []
+    for sent, score in zip(sentences, attn_weights):
+        attention_map.append(SentenceAttention(sentence=sent, attention_score=float(score)))
+    return {
+        "filename": file.filename,
+        "prediction": prediction_label,
+        "confidence": prob,
+        "is_dementia": prob >= CONFIG['threshold'],
+        "attention_map": attention_map
+    }
+@app.get("/health")
+def health_check():
+    return {"status": "active", "device": str(CONFIG['device'])}

model_arch.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoModel
+class GatedFeatureFusion(nn.Module):
+    def __init__(self, embed_dim, feature_dim):
+        super().__init__()
+        self.feat_proj = nn.Linear(feature_dim, embed_dim)
+        self.gate = nn.Sequential(
+            nn.Linear(embed_dim * 2, embed_dim),
+            nn.Sigmoid()
+        )
+        self.norm = nn.LayerNorm(embed_dim)
+    def forward(self, text_embeds, raw_features):
+        feat_embeds = F.relu(self.feat_proj(raw_features))
+        combined = torch.cat([text_embeds, feat_embeds], dim=2)
+        z = self.gate(combined)
+        fused = z * text_embeds + (1 - z) * feat_embeds
+        return self.norm(fused)
+class ResearchHybridModel(nn.Module):
+    def __init__(self, model_name='microsoft/deberta-base', feature_dim=6):
+        super().__init__()
+        self.bert = AutoModel.from_pretrained(model_name)
+        self.bert_hidden = 768
+        self.fusion = GatedFeatureFusion(self.bert_hidden, feature_dim)
+        self.lstm = nn.LSTM(
+            input_size=self.bert_hidden,
+            hidden_size=256,
+            num_layers=2,
+            batch_first=True,
+            bidirectional=True,
+            dropout=0.3
+        )
+        self.attention = nn.Sequential(
+            nn.Linear(512, 128),
+            nn.Tanh(),
+            nn.Linear(128, 1)
+        )
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.Dropout(0.4),
+            nn.Linear(128, 2)
+        )
+    def forward(self, input_ids, attention_mask, linguistic_features, lengths):
+        batch_size, seq_len, word_len = input_ids.shape
+        flat_input = input_ids.view(-1, word_len)
+        flat_mask = attention_mask.view(-1, word_len)
+        bert_out = self.bert(flat_input, attention_mask=flat_mask).last_hidden_state
+        sent_embeds = bert_out[:, 0, :].view(batch_size, seq_len, -1)
+        fused = self.fusion(sent_embeds, linguistic_features)
+        packed = torch.nn.utils.rnn.pack_padded_sequence(fused, lengths.cpu(), batch_first=True, enforce_sorted=False)
+        packed_out, _ = self.lstm(packed)
+        lstm_out, _ = torch.nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True, total_length=seq_len)
+        attn_scores = self.attention(lstm_out)
+        mask = (torch.arange(seq_len, device=input_ids.device)[None, :] < lengths.to(input_ids.device)[:, None]).float().unsqueeze(2)
+        attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
+        attn_weights = F.softmax(attn_scores, dim=1)
+        context = torch.sum(lstm_out * attn_weights, dim=1)
+        return self.classifier(context), attn_weights.squeeze()

preprocessing.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import re
+import numpy as np
+class LiveFeatureExtractor:
+    def __init__(self):
+        self.patterns = {
+            'fillers': re.compile(r'&-([a-z]+)', re.IGNORECASE),
+            'repetition': re.compile(r'\[/+\]'),
+            'retracing': re.compile(r'\[//\]'),
+            'incomplete': re.compile(r'\+[\./]+'),
+            'errors': re.compile(r'\[\*.*?\]'),
+            'pauses': re.compile(r'\(\.+\)')
+        }
+    def clean_for_bert(self, raw_text):
+        text = re.sub(r'^\*PAR:\s+', '', raw_text)
+        text = re.sub(r'\x15\d+_\d+\x15', '', text)
+        text = re.sub(r'<|>', '', text)
+        text = re.sub(r'\[.*?\]', '', text)
+        text = re.sub(r'\(\.+\)', '[PAUSE]', text)
+        text = text.replace('_', ' ')
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+    def get_features(self, raw_text):
+        stats = {k: len(p.findall(raw_text)) for k, p in self.patterns.items()}
+        clean_for_stats = re.sub(r'\[.*?\]', '', raw_text)
+        clean_for_stats = re.sub(r'&-([a-z]+)', '', clean_for_stats)
+        clean_for_stats = re.sub(r'[^\w\s]', '', clean_for_stats)
+        words = clean_for_stats.lower().split()
+        stats['word_count'] = len(words)
+        return stats
+    def get_vector(self, raw_text, global_ttr_override=None):
+        stats = self.get_features(raw_text)
+        n = stats['word_count'] if stats['word_count'] > 0 else 1
+        if global_ttr_override is not None:
+            ttr = global_ttr_override
+        else:
+            clean_for_stats = re.sub(r'\[.*?\]', '', raw_text)
+            clean_for_stats = re.sub(r'&-([a-z]+)', '', clean_for_stats)
+            clean_for_stats = re.sub(r'[^\w\s]', '', clean_for_stats)
+            words = clean_for_stats.lower().split()
+            ttr = (len(set(words)) / n) if n > 0 else 0.0
+        return [
+            ttr,
+            stats['fillers']/n,
+            stats['repetition']/n,
+            stats['retracing']/n,
+            stats['errors']/n,
+            stats['pauses']/n
+        ]
+class ChaParser:
+    def __init__(self):
+        self.extractor = LiveFeatureExtractor()
+    def parse(self, file_content_lines):
+        sentences = []
+        features = []
+        raw_lines = []
+        all_words_in_session = []
+        decoded_lines = [line.decode('utf-8') if isinstance(line, bytes) else line for line in file_content_lines]
+        for line in decoded_lines:
+            if line.startswith('*PAR:'):
+                clean_line = re.sub(r'[^\w\s]', '', line.replace('*PAR:', ''))
+                words = clean_line.lower().split()
+                all_words_in_session.extend(words)
+        unique_words = len(set(all_words_in_session))
+        total_words = len(all_words_in_session)
+        global_ttr = unique_words / total_words if total_words > 0 else 0.0
+        for line in decoded_lines:
+            if line.startswith('*PAR:'):
+                display_text = self.extractor.clean_for_bert(line)
+                feat_vec = self.extractor.get_vector(line, global_ttr_override=global_ttr)
+                sentences.append(display_text)
+                features.append(feat_vec)
+                raw_lines.append(line.strip())
+        return sentences, features, raw_lines

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+python-multipart
+torch
+transformers
+numpy
+regex
+requests
+python-dotenv