KShoichi commited on
Commit
c464c97
·
verified ·
1 Parent(s): bcd596c

Upload setup_competition.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. setup_competition.py +543 -0
setup_competition.py ADDED
@@ -0,0 +1,543 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Competition Setup Script - Automated setup for competition-ready hallucination detection system
4
+ """
5
+ import subprocess
6
+ import sys
7
+ import os
8
+ import logging
9
+ import json
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any
12
+ import time
13
+
14
+ # Configure logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s'
18
+ )
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class CompetitionSetup:
22
+ """Automated setup for competition system"""
23
+
24
+ def __init__(self):
25
+ self.project_root = Path(__file__).parent
26
+ self.setup_log = []
27
+
28
+ def setup_competition_system(self):
29
+ """Complete setup for competition system"""
30
+ logger.info("🚀 Starting Competition System Setup...")
31
+
32
+ steps = [
33
+ ("Checking Python version", self.check_python_version),
34
+ ("Installing competition requirements", self.install_requirements),
35
+ ("Downloading NLP models", self.download_nlp_models),
36
+ ("Setting up advanced configuration", self.setup_advanced_config),
37
+ ("Initializing competition database", self.setup_database),
38
+ ("Creating model directories", self.create_model_directories),
39
+ ("Generating sample competition data", self.generate_competition_data),
40
+ ("Running system validation", self.validate_system),
41
+ ("Setting up monitoring", self.setup_monitoring),
42
+ ("Creating deployment configs", self.create_deployment_configs)
43
+ ]
44
+
45
+ total_steps = len(steps)
46
+
47
+ for i, (step_name, step_func) in enumerate(steps, 1):
48
+ logger.info(f"[{i}/{total_steps}] {step_name}...")
49
+ try:
50
+ start_time = time.time()
51
+ result = step_func()
52
+ duration = time.time() - start_time
53
+
54
+ self.setup_log.append({
55
+ 'step': step_name,
56
+ 'status': 'success',
57
+ 'duration': duration,
58
+ 'details': result
59
+ })
60
+ logger.info(f"✅ {step_name} completed ({duration:.2f}s)")
61
+
62
+ except Exception as e:
63
+ self.setup_log.append({
64
+ 'step': step_name,
65
+ 'status': 'failed',
66
+ 'error': str(e)
67
+ })
68
+ logger.error(f"❌ {step_name} failed: {e}")
69
+
70
+ # Generate setup report
71
+ self.generate_setup_report()
72
+ logger.info("🎯 Competition system setup completed!")
73
+
74
+ def check_python_version(self) -> Dict[str, Any]:
75
+ """Check if Python version is compatible"""
76
+ version = sys.version_info
77
+
78
+ if version.major != 3 or version.minor < 8:
79
+ raise RuntimeError(f"Python 3.8+ required, found {version.major}.{version.minor}")
80
+
81
+ return {
82
+ 'python_version': f"{version.major}.{version.minor}.{version.micro}",
83
+ 'compatible': True
84
+ }
85
+
86
+ def install_requirements(self) -> Dict[str, Any]:
87
+ """Install competition requirements"""
88
+ requirements_file = self.project_root / "requirements_competition.txt"
89
+
90
+ if not requirements_file.exists():
91
+ raise FileNotFoundError(f"Requirements file not found: {requirements_file}")
92
+
93
+ # Install requirements
94
+ cmd = [sys.executable, "-m", "pip", "install", "-r", str(requirements_file)]
95
+ result = subprocess.run(cmd, capture_output=True, text=True)
96
+
97
+ if result.returncode != 0:
98
+ raise RuntimeError(f"Failed to install requirements: {result.stderr}")
99
+
100
+ return {
101
+ 'requirements_installed': True,
102
+ 'output': result.stdout.strip().split('\n')[-5:] # Last 5 lines
103
+ }
104
+
105
+ def download_nlp_models(self) -> Dict[str, Any]:
106
+ """Download required NLP models"""
107
+ models_to_download = [
108
+ ("spacy", "en_core_web_sm"),
109
+ ("nltk", "punkt"),
110
+ ("nltk", "stopwords"),
111
+ ("nltk", "vader_lexicon")
112
+ ]
113
+
114
+ downloaded = []
115
+
116
+ # Download spaCy model
117
+ try:
118
+ cmd = [sys.executable, "-m", "spacy", "download", "en_core_web_sm"]
119
+ result = subprocess.run(cmd, capture_output=True, text=True)
120
+ if result.returncode == 0:
121
+ downloaded.append("spacy:en_core_web_sm")
122
+ except Exception as e:
123
+ logger.warning(f"Failed to download spaCy model: {e}")
124
+
125
+ # Download NLTK data
126
+ try:
127
+ import nltk
128
+ nltk.download('punkt', quiet=True)
129
+ nltk.download('stopwords', quiet=True)
130
+ nltk.download('vader_lexicon', quiet=True)
131
+ downloaded.extend(["nltk:punkt", "nltk:stopwords", "nltk:vader_lexicon"])
132
+ except Exception as e:
133
+ logger.warning(f"Failed to download NLTK data: {e}")
134
+
135
+ return {
136
+ 'models_downloaded': downloaded,
137
+ 'total': len(downloaded)
138
+ }
139
+
140
+ def setup_advanced_config(self) -> Dict[str, Any]:
141
+ """Setup advanced configuration for competition"""
142
+ config = {
143
+ "competition": {
144
+ "enabled": True,
145
+ "advanced_detection": True,
146
+ "ensemble_methods": True,
147
+ "real_time_analytics": True
148
+ },
149
+ "model": {
150
+ "primary_model": "google/flan-t5-base",
151
+ "competition_model": "competition_model",
152
+ "ensemble_weights": {
153
+ "neural_consistency": 0.25,
154
+ "semantic_similarity": 0.20,
155
+ "fact_verification": 0.20,
156
+ "linguistic_analysis": 0.15,
157
+ "statistical_anomaly": 0.10,
158
+ "domain_expertise": 0.10
159
+ }
160
+ },
161
+ "performance": {
162
+ "max_concurrent_requests": 10,
163
+ "cache_size": 1000,
164
+ "gpu_memory_fraction": 0.8,
165
+ "enable_model_parallel": True
166
+ },
167
+ "monitoring": {
168
+ "enable_metrics": True,
169
+ "log_level": "INFO",
170
+ "performance_tracking": True,
171
+ "error_tracking": True
172
+ },
173
+ "security": {
174
+ "rate_limiting": True,
175
+ "input_validation": True,
176
+ "output_sanitization": True
177
+ }
178
+ }
179
+
180
+ config_file = self.project_root / "competition_config.json"
181
+ with open(config_file, 'w') as f:
182
+ json.dump(config, f, indent=2)
183
+
184
+ return {
185
+ "config_file": str(config_file),
186
+ "config_sections": list(config.keys())
187
+ }
188
+
189
+ def setup_database(self) -> Dict[str, Any]:
190
+ """Initialize competition database"""
191
+ try:
192
+ # Create database directory
193
+ db_dir = self.project_root / "app" / "database"
194
+ db_dir.mkdir(exist_ok=True)
195
+
196
+ # Create advanced tables SQL
197
+ sql_script = '''
198
+ CREATE TABLE IF NOT EXISTS competition_predictions (
199
+ id TEXT PRIMARY KEY,
200
+ prompt TEXT NOT NULL,
201
+ response TEXT NOT NULL,
202
+ question TEXT NOT NULL,
203
+ is_hallucination BOOLEAN NOT NULL,
204
+ confidence_score REAL NOT NULL,
205
+ risk_level TEXT NOT NULL,
206
+ detection_methods TEXT NOT NULL,
207
+ processing_time REAL NOT NULL,
208
+ model_version TEXT NOT NULL,
209
+ explanation TEXT,
210
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
211
+ );
212
+
213
+ CREATE TABLE IF NOT EXISTS analytics_events (
214
+ id TEXT PRIMARY KEY,
215
+ event_type TEXT NOT NULL,
216
+ event_data TEXT NOT NULL,
217
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
218
+ );
219
+
220
+ CREATE TABLE IF NOT EXISTS performance_metrics (
221
+ id TEXT PRIMARY KEY,
222
+ metric_name TEXT NOT NULL,
223
+ metric_value REAL NOT NULL,
224
+ metric_metadata TEXT,
225
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
226
+ );
227
+
228
+ CREATE INDEX IF NOT EXISTS idx_predictions_created_at ON competition_predictions(created_at);
229
+ CREATE INDEX IF NOT EXISTS idx_analytics_timestamp ON analytics_events(timestamp);
230
+ CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON performance_metrics(timestamp);
231
+ '''
232
+
233
+ # Execute SQL
234
+ import sqlite3
235
+ db_path = self.project_root / "app" / "competition.db"
236
+ conn = sqlite3.connect(db_path)
237
+ conn.executescript(sql_script)
238
+ conn.close()
239
+
240
+ return {
241
+ "database_path": str(db_path),
242
+ "tables_created": ["competition_predictions", "analytics_events", "performance_metrics"]
243
+ }
244
+
245
+ except Exception as e:
246
+ raise RuntimeError(f"Database setup failed: {e}")
247
+
248
+ def create_model_directories(self) -> Dict[str, Any]:
249
+ """Create directories for competition models"""
250
+ directories = [
251
+ "competition_model",
252
+ "ensemble_models",
253
+ "model_cache",
254
+ "training_checkpoints",
255
+ "evaluation_results"
256
+ ]
257
+
258
+ created_dirs = []
259
+ for dir_name in directories:
260
+ dir_path = self.project_root / dir_name
261
+ dir_path.mkdir(exist_ok=True)
262
+ created_dirs.append(str(dir_path))
263
+
264
+ return {
265
+ "directories_created": created_dirs,
266
+ "total": len(created_dirs)
267
+ }
268
+
269
+ def generate_competition_data(self) -> Dict[str, Any]:
270
+ """Generate advanced training data for competition"""
271
+ competition_data = []
272
+
273
+ # Advanced hallucination examples
274
+ examples = [
275
+ # Typo-based hallucinations
276
+ {
277
+ "prompt": "iPhone 15 Pro specifications",
278
+ "response": "The ipon 15 Pro features the A17 Pro chip",
279
+ "question": "What chip does iPhone 15 Pro have?",
280
+ "is_hallucination": True,
281
+ "category": "typo"
282
+ },
283
+ # Specification errors
284
+ {
285
+ "prompt": "Tesla Model 3 performance specs",
286
+ "response": "Tesla Model 3 accelerates 0-60 mph in 0.5 seconds",
287
+ "question": "What is the 0-60 time?",
288
+ "is_hallucination": True,
289
+ "category": "impossible_spec"
290
+ },
291
+ # Logical contradictions
292
+ {
293
+ "prompt": "MacBook Pro M3 storage options",
294
+ "response": "The MacBook Pro M3 comes with 256TB of storage",
295
+ "question": "How much storage does it have?",
296
+ "is_hallucination": True,
297
+ "category": "impossible_spec"
298
+ },
299
+ # Factual accuracy
300
+ {
301
+ "prompt": "iPhone 15 camera specifications",
302
+ "response": "iPhone 15 has a 48MP main camera",
303
+ "question": "What is the camera resolution?",
304
+ "is_hallucination": False,
305
+ "category": "factual"
306
+ },
307
+ # Context contradictions
308
+ {
309
+ "prompt": "Android phone running iOS",
310
+ "response": "This Samsung Galaxy runs iOS 17 perfectly",
311
+ "question": "What operating system does it run?",
312
+ "is_hallucination": True,
313
+ "category": "logical_contradiction"
314
+ }
315
+ ]
316
+
317
+ # Generate variations
318
+ for base_example in examples:
319
+ for i in range(3): # 3 variations each
320
+ example = base_example.copy()
321
+ example['id'] = f"{base_example['category']}_{i+1}"
322
+ competition_data.append(example)
323
+
324
+ # Save to CSV
325
+ import pandas as pd
326
+ df = pd.DataFrame(competition_data)
327
+
328
+ output_file = self.project_root / "competition_training_data.csv"
329
+ df.to_csv(output_file, index=False)
330
+
331
+ return {
332
+ "training_file": str(output_file),
333
+ "total_examples": len(competition_data),
334
+ "categories": list(set(ex['category'] for ex in competition_data))
335
+ }
336
+
337
+ def validate_system(self) -> Dict[str, Any]:
338
+ """Validate that the competition system is working"""
339
+ validations = []
340
+
341
+ try:
342
+ # Test imports
343
+ import torch
344
+ validations.append(("torch", torch.__version__))
345
+
346
+ import transformers
347
+ validations.append(("transformers", transformers.__version__))
348
+
349
+ import fastapi
350
+ validations.append(("fastapi", fastapi.__version__))
351
+
352
+ # Test CUDA availability
353
+ cuda_available = torch.cuda.is_available()
354
+ validations.append(("cuda", f"Available: {cuda_available}"))
355
+
356
+ # Test model loading (basic)
357
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
358
+ tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small") # Use small for test
359
+ model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
360
+ validations.append(("model_loading", "Success"))
361
+
362
+ return {
363
+ "all_validations_passed": True,
364
+ "validations": validations
365
+ }
366
+
367
+ except Exception as e:
368
+ return {
369
+ "all_validations_passed": False,
370
+ "error": str(e),
371
+ "validations": validations
372
+ }
373
+
374
+ def setup_monitoring(self) -> Dict[str, Any]:
375
+ """Setup monitoring and logging"""
376
+
377
+ # Create monitoring configuration
378
+ monitoring_config = {
379
+ "metrics": {
380
+ "enabled": True,
381
+ "port": 8090,
382
+ "path": "/metrics"
383
+ },
384
+ "logging": {
385
+ "level": "INFO",
386
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
387
+ "handlers": {
388
+ "file": {
389
+ "filename": "competition.log",
390
+ "max_bytes": 10485760, # 10MB
391
+ "backup_count": 5
392
+ },
393
+ "console": {
394
+ "enabled": True
395
+ }
396
+ }
397
+ },
398
+ "alerts": {
399
+ "high_latency_threshold": 5.0,
400
+ "error_rate_threshold": 0.05,
401
+ "memory_usage_threshold": 0.9
402
+ }
403
+ }
404
+
405
+ # Save monitoring config
406
+ monitoring_file = self.project_root / "monitoring_config.json"
407
+ with open(monitoring_file, 'w') as f:
408
+ json.dump(monitoring_config, f, indent=2)
409
+
410
+ # Create log directory
411
+ log_dir = self.project_root / "logs"
412
+ log_dir.mkdir(exist_ok=True)
413
+
414
+ return {
415
+ "monitoring_config": str(monitoring_file),
416
+ "log_directory": str(log_dir)
417
+ }
418
+
419
+ def create_deployment_configs(self) -> Dict[str, Any]:
420
+ """Create deployment configurations"""
421
+
422
+ # Docker configuration
423
+ dockerfile_content = '''
424
+ FROM python:3.11-slim
425
+
426
+ WORKDIR /app
427
+
428
+ # Install system dependencies
429
+ RUN apt-get update && apt-get install -y \\
430
+ gcc \\
431
+ g++ \\
432
+ && rm -rf /var/lib/apt/lists/*
433
+
434
+ # Copy requirements and install
435
+ COPY requirements_competition.txt .
436
+ RUN pip install --no-cache-dir -r requirements_competition.txt
437
+
438
+ # Copy application
439
+ COPY . .
440
+
441
+ # Install spacy model
442
+ RUN python -m spacy download en_core_web_sm
443
+
444
+ # Expose port
445
+ EXPOSE 8000
446
+
447
+ # Run application
448
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
449
+ '''
450
+
451
+ # Docker Compose configuration
452
+ docker_compose_content = '''
453
+ version: '3.8'
454
+
455
+ services:
456
+ hallucination-detector:
457
+ build: .
458
+ ports:
459
+ - "8000:8000"
460
+ volumes:
461
+ - ./logs:/app/logs
462
+ - ./models:/app/models
463
+ environment:
464
+ - CUDA_VISIBLE_DEVICES=0
465
+ deploy:
466
+ resources:
467
+ reservations:
468
+ devices:
469
+ - driver: nvidia
470
+ count: 1
471
+ capabilities: [gpu]
472
+
473
+ redis:
474
+ image: redis:7-alpine
475
+ ports:
476
+ - "6379:6379"
477
+ command: redis-server --appendonly yes
478
+ volumes:
479
+ - redis_data:/data
480
+
481
+ monitoring:
482
+ image: grafana/grafana:latest
483
+ ports:
484
+ - "3000:3000"
485
+ volumes:
486
+ - grafana_data:/var/lib/grafana
487
+
488
+ volumes:
489
+ redis_data:
490
+ grafana_data:
491
+ '''
492
+
493
+ # Save files
494
+ dockerfile_path = self.project_root / "Dockerfile"
495
+ with open(dockerfile_path, 'w') as f:
496
+ f.write(dockerfile_content)
497
+
498
+ compose_path = self.project_root / "docker-compose.yml"
499
+ with open(compose_path, 'w') as f:
500
+ f.write(docker_compose_content)
501
+
502
+ return {
503
+ "dockerfile": str(dockerfile_path),
504
+ "docker_compose": str(compose_path)
505
+ }
506
+
507
+ def generate_setup_report(self):
508
+ """Generate comprehensive setup report"""
509
+ report = {
510
+ "setup_completed": True,
511
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
512
+ "steps": self.setup_log,
513
+ "summary": {
514
+ "total_steps": len(self.setup_log),
515
+ "successful": len([s for s in self.setup_log if s['status'] == 'success']),
516
+ "failed": len([s for s in self.setup_log if s['status'] == 'failed'])
517
+ }
518
+ }
519
+
520
+ report_file = self.project_root / "setup_report.json"
521
+ with open(report_file, 'w') as f:
522
+ json.dump(report, f, indent=2)
523
+
524
+ logger.info(f"📊 Setup report saved to: {report_file}")
525
+
526
+ # Print summary
527
+ print("\n" + "="*60)
528
+ print("🎯 COMPETITION SYSTEM SETUP COMPLETE")
529
+ print("="*60)
530
+ print(f"✅ Successful steps: {report['summary']['successful']}")
531
+ print(f"❌ Failed steps: {report['summary']['failed']}")
532
+ print(f"📊 Setup report: {report_file}")
533
+ print("\n🚀 Your competition-ready system is now available!")
534
+ print("\nNext steps:")
535
+ print("1. Run training: python -m app.model.competition_training")
536
+ print("2. Start server: uvicorn app.main:app --reload")
537
+ print("3. Access competition API: http://localhost:8000/competition/")
538
+ print("4. View analytics: http://localhost:8000/competition/analytics")
539
+
540
+
541
+ if __name__ == "__main__":
542
+ setup = CompetitionSetup()
543
+ setup.setup_competition_system()