File size: 3,125 Bytes
62a67da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d153152
62a67da
 
 
 
 
 
 
 
d153152
62a67da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
"""
Updated config that loads models from local repo storage
Falls back to HF Hub if local models not available
"""

import os
from pathlib import Path

# Models directory in repo
MODELS_DIR = Path(__file__).parent.parent / "models"

# Try to use local models first, fallback to HF Hub IDs
LOCAL_MODELS_AVAILABLE = MODELS_DIR.exists()

if LOCAL_MODELS_AVAILABLE:
    print(f"📁 Loading models from local repo: {MODELS_DIR}")
    # Use local paths
    SENTIMENT_MODEL = str(MODELS_DIR / "sentiment" / "model")
    SENTIMENT_TOKENIZER = str(MODELS_DIR / "sentiment" / "tokenizer")
    NER_MODEL = "dslim/bert-base-NER"  # Can add locally if needed
    QA_MODEL = "deepset/roberta-base-squad2"  # Can add locally if needed
    SUMMARIZATION_MODEL = "facebook/bart-large-cnn"  # Too large for repo
    EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Can add locally if needed
else:
    print("🌐 Local models not found, using HF Hub (will download on first use)")
    # Fall back to HF Hub
    SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
    SENTIMENT_TOKENIZER = "bert-base-uncased"
    NER_MODEL = "dslim/bert-base-NER"
    QA_MODEL = "deepset/roberta-base-squad2"
    SUMMARIZATION_MODEL = "facebook/bart-large-cnn"
    EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# Task definitions
TASKS = {
    "sentiment": {
        "name": "Sentiment Analysis",
        "description": "Classify text sentiment (positive/negative/neutral)",
        "model": SENTIMENT_MODEL,
        "example": "I absolutely love this product! It's amazing and works perfectly.",
    },
    "ner": {
        "name": "Named Entity Recognition",
        "description": "Identify and classify named entities (Person, Location, Organization)",
        "model": NER_MODEL,
        "example": "Apple Inc. was founded by Steve Jobs in Cupertino, California.",
    },
    "qa": {
        "name": "Question Answering",
        "description": "Answer questions based on provided context",
        "model": QA_MODEL,
        "example_context": "The Hugging Face Hub is a platform for sharing machine learning models, datasets, and demos.",
        "example_question": "What is the Hugging Face Hub?",
    },
    "summarization": {
        "name": "Text Summarization",
        "description": "Generate concise summaries of longer texts",
        "model": SUMMARIZATION_MODEL,
        "example": "The Hugging Face transformers library provides state-of-the-art pre-trained models for natural language processing tasks. It supports PyTorch and TensorFlow, making it easy to use with either framework.",
    },
    "similarity": {
        "name": "Semantic Similarity",
        "description": "Compare semantic similarity between two sentences",
        "model": EMBEDDINGS_MODEL,
        "example1": "The cat is sleeping on the mat",
        "example2": "A feline is resting on the rug",
    },
}

# Sample data paths
SAMPLE_DATA_CSV = "data/sample_texts.csv"
DEMO_SAMPLES_DIR = "data/demo_samples"

# Session timings
SESSION1_DURATION = 45  # minutes
SESSION2_DURATION = 90  # minutes