Chirapath commited on
Commit
c46080e
·
verified ·
1 Parent(s): 6ec5288

Upload 13 files

Browse files

Add application files

Files changed (13) hide show
  1. .env +63 -0
  2. .gitattributes +35 -35
  3. Developer.md +1904 -0
  4. README.md +12 -14
  5. USER.md +459 -0
  6. ai_summary.py +796 -0
  7. app.py +1661 -0
  8. backend.py +1472 -0
  9. env_template.sh +62 -0
  10. file_processors.py +442 -0
  11. image_extraction.py +417 -0
  12. implementation_guide.txt +300 -0
  13. requirements.txt +40 -0
.env ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Azure Speech Services Configuration
2
+ AZURE_SPEECH_KEY=8wLwSSfcB0Z35xFx9HavbzBBvVwN4zalR42HeVig2NKe81AkbwFPJQQJ99BGACYeBjFXJ3w3AAAYACOGnAXF
3
+ AZURE_SPEECH_KEY_ENDPOINT=https://eastus.api.cognitive.microsoft.com/
4
+ AZURE_REGION=eastus
5
+
6
+ # Azure Blob Storage Configuration
7
+ AZURE_BLOB_CONNECTION=DefaultEndpointsProtocol=https;AccountName=speechtotextservice01;AccountKey=GAFbqMBvIHkRXLIx9173jFVb7W96lQ02t7bGgKwq6LbpU2gqaUeU+pWAKcbdn38rQYfKnOFVy5ar+AStxXjAJA==;EndpointSuffix=core.windows.net
8
+ AZURE_CONTAINER=history
9
+ AZURE_CONTAINER_CHAT=response-chat
10
+ AZURE_BLOB_SAS_TOKEN=sp=racwdl&st=2025-07-08T15:31:37Z&se=2030-07-08T23:31:37Z&sv=2024-11-04&sr=c&sig=FF8Nh6eOvx08ouMEV6u7tWxn9viD7OFJXmAbiO2BeK8%3D
11
+
12
+ # Computer Vision Services Configuration (NEW)
13
+ COMPUTER_VISION_ENDPOINT=https://image-process-256808.cognitiveservices.azure.com/
14
+ COMPUTER_VISION_KEY=64PKopBssiMAEHVtqRySNxv5HRWsza7mN27KgDoXqaOcgIv5Z3AFJQQJ99BHACYeBjFXJ3w3AAAFACOG8rdi
15
+ COMPUTER_VISION_REGION=eastus
16
+
17
+ # AI Agents Configuration (NEW)
18
+ AI_PROJECT_ENDPOINT=https://aiservicetesting001.services.ai.azure.com/api/projects/aiagentdeplyomentproject
19
+ AI_PROJECT_KEY=9qYUFJwW1qDvF05Qz8RPuL0r1JbO123WSKMdRcnh76jO2VGB4Z6oJQQJ99BCAC77bzfXJ3w3AAAAACOGlbzd
20
+ AI_AGENT_ID=asst_8isTjrGPs8M0d1RhkNONDtHK
21
+
22
+ # Azure OpenAI Configuration
23
+ AZURE_OPENAI_ENDPOINT=https://openaiservice2568.openai.azure.com/
24
+ AZURE_OPENAI_KEY=8CZSXFphWviu1KBpweiUntRKrJgYR2hApSUT76f5MlBsSjuvKulnJQQJ99BCACYeBjFXJ3w3AAABACOGc2vU
25
+ AZURE_OPENAI_DEPLOYMENT=gpt-4.1-mini
26
+ AZURE_OPENAI_API_VERSION=2024-12-01-preview
27
+
28
+ # API Configuration
29
+ API_VERSION=v3.2
30
+
31
+ # Allowed Languages Configuration
32
+ ALLOWED_LANGS={"en-US": "English (US)", "en-GB": "English (UK)", "es-ES": "Spanish", "fr-FR": "French", "de-DE": "German", "it-IT": "Italian", "pt-BR": "Portuguese (Brazil)", "zh-CN": "Chinese (Simplified)", "ja-JP": "Japanese", "ko-KR": "Korean", "ru-RU": "Russian", "ar-SA": "Arabic", "hi-IN": "Hindi", "th-TH": "Thai", "vi-VN": "Vietnamese", "nl-NL": "Dutch", "sv-SE": "Swedish", "da-DK": "Danish", "no-NO": "Norwegian", "fi-FI": "Finnish", "pl-PL": "Polish", "cs-CZ": "Czech", "hu-HU": "Hungarian", "ro-RO": "Romanian", "bg-BG": "Bulgarian", "hr-HR": "Croatian", "sk-SK": "Slovak", "sl-SI": "Slovenian", "et-EE": "Estonian", "lv-LV": "Latvian", "lt-LT": "Lithuanian", "uk-UA": "Ukrainian", "el-GR": "Greek", "tr-TR": "Turkish", "he-IL": "Hebrew", "fa-IR": "Persian", "ur-PK": "Urdu", "bn-BD": "Bengali", "ta-IN": "Tamil", "te-IN": "Telugu", "ml-IN": "Malayalam", "kn-IN": "Kannada", "gu-IN": "Gujarati", "pa-IN": "Punjabi", "mr-IN": "Marathi", "ne-NP": "Nepali", "si-LK": "Sinhala", "my-MM": "Myanmar", "km-KH": "Khmer", "lo-LA": "Lao", "ka-GE": "Georgian", "am-ET": "Amharic", "sw-TZ": "Swahili", "zu-ZA": "Zulu", "af-ZA": "Afrikaans", "is-IS": "Icelandic", "mt-MT": "Maltese", "cy-GB": "Welsh", "ga-IE": "Irish", "eu-ES": "Basque", "ca-ES": "Catalan", "gl-ES": "Galician", "pt-PT": "Portuguese (Portugal)", "fr-CA": "French (Canada)", "en-AU": "English (Australia)", "en-IN": "English (India)", "en-CA": "English (Canada)", "en-NZ": "English (New Zealand)", "en-ZA": "English (South Africa)", "es-MX": "Spanish (Mexico)", "es-AR": "Spanish (Argentina)", "es-CO": "Spanish (Colombia)", "es-CL": "Spanish (Chile)", "es-PE": "Spanish (Peru)", "es-VE": "Spanish (Venezuela)", "es-EC": "Spanish (Ecuador)", "es-GT": "Spanish (Guatemala)", "es-CR": "Spanish (Costa Rica)", "es-PA": "Spanish (Panama)", "es-DO": "Spanish (Dominican Republic)", "es-PR": "Spanish (Puerto Rico)", "es-UY": "Spanish (Uruguay)", "es-PY": "Spanish (Paraguay)", "es-BO": "Spanish (Bolivia)", "es-SV": "Spanish (El Salvador)", "es-HN": "Spanish (Honduras)", "es-NI": "Spanish (Nicaragua)", "zh-TW": "Chinese (Traditional)", "zh-HK": "Chinese (Hong Kong)"}
33
+
34
+ # Application Settings
35
+ DEBUG=False
36
+ UPLOAD_MAX_SIZE_MB=500
37
+ MAX_CONCURRENT_JOBS=5
38
+ DATABASE_PATH=database/transcriptions.db
39
+ CLEANUP_OLDER_THAN_DAYS=30
40
+
41
+ # # Security Settings (Optional - for enhanced security)
42
+ # SECRET_KEY=your_secret_key_for_sessions
43
+ # ENCRYPTION_KEY=your_encryption_key_for_sensitive_data
44
+
45
+ # Logging Settings
46
+ LOG_LEVEL=INFO
47
+ LOG_FILE=app.log
48
+
49
+ # Performance Settings
50
+ FRAME_EXTRACTION_MAX_FRAMES=50
51
+ FRAME_SIMILARITY_THRESHOLD=0.85
52
+ MIN_TIME_BETWEEN_FRAMES=2.0
53
+
54
+ # File Processing Settings
55
+ SUPPORTED_VIDEO_FORMATS=mp4,mov,avi,mkv,webm,flv,3gp,wmv
56
+ SUPPORTED_AUDIO_FORMATS=wav,mp3,ogg,opus,flac,wma,aac,m4a,amr,speex
57
+ SUPPORTED_DOCUMENT_FORMATS=pdf,docx,doc,pptx,ppt,xlsx,xls,csv,txt,json
58
+ SUPPORTED_IMAGE_FORMATS=jpg,jpeg,png,bmp,gif,tiff,webp
59
+
60
+ # Database Settings
61
+ DATABASE_BACKUP_INTERVAL=30
62
+ DATABASE_LOCATION=database/transcriptions.db
63
+ TEMP_FILES_CLEANUP_HOURS=24
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Developer.md ADDED
@@ -0,0 +1,1904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🛠️ Azure Speech Transcription - Developer Guide
2
+
3
+ ## 📋 Table of Contents
4
+
5
+ - [System Architecture](#-system-architecture)
6
+ - [Development Environment](#-development-environment)
7
+ - [Deployment Guide](#-deployment-guide)
8
+ - [API Documentation](#-api-documentation)
9
+ - [Database Schema](#-database-schema)
10
+ - [Security Implementation](#-security-implementation)
11
+ - [Monitoring & Maintenance](#-monitoring--maintenance)
12
+ - [Contributing Guidelines](#-contributing-guidelines)
13
+ - [Advanced Configuration](#-advanced-configuration)
14
+ - [Troubleshooting](#-troubleshooting)
15
+
16
+ ---
17
+
18
+ ## 🏗️ System Architecture
19
+
20
+ ### Overview
21
+
22
+ The Azure Speech Transcription service is built with a modern, secure architecture focusing on user privacy, PDPA compliance, and scalability.
23
+
24
+ ```
25
+ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
26
+ │ Frontend UI │ │ Backend API │ │ Azure Services │
27
+ │ (Gradio) │◄──►│ (Python) │◄──►│ Speech & Blob │
28
+ └─────────────────┘ └─────────────────┘ └─────────────────┘
29
+ │ │ │
30
+ │ │ │
31
+ ▼ ▼ ▼
32
+ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
33
+ │ User Session │ │ SQLite Database │ │ User Storage │
34
+ │ Management │ │ (Metadata) │ │ (Isolated) │
35
+ └─────────────────┘ └─────────────────┘ └─────────────────┘
36
+ ```
37
+
38
+ ### Core Components
39
+
40
+ #### 1. Frontend Layer (`gradio_app.py`)
41
+ - **Technology**: Gradio with custom CSS
42
+ - **Purpose**: User interface and session management
43
+ - **Features**: Authentication, file upload, real-time status, history management
44
+
45
+ #### 2. Backend Layer (`app_core.py`)
46
+ - **Technology**: Python with threading and async processing
47
+ - **Purpose**: Business logic, authentication, and Azure integration
48
+ - **Features**: User management, transcription processing, PDPA compliance
49
+
50
+ #### 3. Data Layer
51
+ - **Database**: SQLite with Azure Blob backup
52
+ - **Storage**: Azure Blob Storage with user separation
53
+ - **Security**: User-isolated folders and encrypted connections
54
+
55
+ #### 4. External Services
56
+ - **Azure Speech Services**: Transcription processing
57
+ - **Azure Blob Storage**: File and database storage
58
+ - **FFmpeg**: Audio/video conversion
59
+
60
+ ### Data Flow
61
+
62
+ ```
63
+ 1. User uploads file → 2. Authentication check → 3. File validation
64
+ ↓ ↓ ↓
65
+ 8. Download results ← 7. Store transcript ← 6. Process with Azure
66
+ ↑ ↑ ↑
67
+ 9. Update UI status ← 4. Save to user folder ← 5. Background processing
68
+ ```
69
+
70
+ ---
71
+
72
+ ## 💻 Development Environment
73
+
74
+ ### Prerequisites
75
+
76
+ - **Python**: 3.8 or higher
77
+ - **Azure Account**: With Speech Services and Blob Storage
78
+ - **FFmpeg**: For audio/video processing
79
+ - **Git**: For version control
80
+
81
+ ### Environment Setup
82
+
83
+ #### 1. Clone Repository
84
+ ```bash
85
+ git clone <repository-url>
86
+ cd azure-speech-transcription
87
+ ```
88
+
89
+ #### 2. Virtual Environment
90
+ ```bash
91
+ # Create virtual environment
92
+ python -m venv venv
93
+
94
+ # Activate (Windows)
95
+ venv\Scripts\activate
96
+
97
+ # Activate (macOS/Linux)
98
+ source venv/bin/activate
99
+ ```
100
+
101
+ #### 3. Install Dependencies
102
+ ```bash
103
+ pip install -r requirements.txt
104
+ ```
105
+
106
+ #### 4. Environment Configuration
107
+ ```bash
108
+ # Copy environment template
109
+ cp .env.example .env
110
+
111
+ # Edit with your Azure credentials
112
+ nano .env
113
+ ```
114
+
115
+ #### 5. Install FFmpeg
116
+
117
+ **Windows (Chocolatey):**
118
+ ```bash
119
+ choco install ffmpeg
120
+ ```
121
+
122
+ **macOS (Homebrew):**
123
+ ```bash
124
+ brew install ffmpeg
125
+ ```
126
+
127
+ **Ubuntu/Debian:**
128
+ ```bash
129
+ sudo apt update
130
+ sudo apt install ffmpeg
131
+ ```
132
+
133
+ #### 6. Verify Installation
134
+ ```python
135
+ python -c "
136
+ import gradio as gr
137
+ from azure.storage.blob import BlobServiceClient
138
+ import subprocess
139
+ print('Gradio:', gr.__version__)
140
+ print('FFmpeg:', subprocess.run(['ffmpeg', '-version'], capture_output=True).returncode == 0)
141
+ print('Azure Blob:', 'OK')
142
+ "
143
+ ```
144
+
145
+ ### Development Server
146
+
147
+ ```bash
148
+ # Start development server
149
+ python gradio_app.py
150
+
151
+ # Server will be available at:
152
+ # http://localhost:7860
153
+ ```
154
+
155
+ ### Development Tools
156
+
157
+ #### Recommended IDE Setup
158
+ - **VS Code**: With Python, Azure, and Git extensions
159
+ - **PyCharm**: Professional edition with Azure toolkit
160
+ - **Vim/Emacs**: With appropriate Python plugins
161
+
162
+ #### Useful Extensions
163
+ ```json
164
+ {
165
+ "recommendations": [
166
+ "ms-python.python",
167
+ "ms-vscode.azure-cli",
168
+ "ms-azuretools.azure-cli-tools",
169
+ "ms-python.black-formatter",
170
+ "ms-python.flake8"
171
+ ]
172
+ }
173
+ ```
174
+
175
+ #### Code Quality Tools
176
+ ```bash
177
+ # Install development tools
178
+ pip install black flake8 pytest mypy
179
+
180
+ # Format code
181
+ black .
182
+
183
+ # Lint code
184
+ flake8 .
185
+
186
+ # Type checking
187
+ mypy app_core.py gradio_app.py
188
+ ```
189
+
190
+ ---
191
+
192
+ ## 🚀 Deployment Guide
193
+
194
+ ### Production Deployment Options
195
+
196
+ #### Option 1: Traditional Server Deployment
197
+
198
+ **1. Server Preparation**
199
+ ```bash
200
+ # Update system
201
+ sudo apt update && sudo apt upgrade -y
202
+
203
+ # Install Python and dependencies
204
+ sudo apt install python3 python3-pip python3-venv nginx ffmpeg -y
205
+
206
+ # Create application user
207
+ sudo useradd -m -s /bin/bash transcription
208
+ sudo su - transcription
209
+ ```
210
+
211
+ **2. Application Setup**
212
+ ```bash
213
+ # Clone repository
214
+ git clone <repository-url> /home/transcription/app
215
+ cd /home/transcription/app
216
+
217
+ # Setup virtual environment
218
+ python3 -m venv venv
219
+ source venv/bin/activate
220
+ pip install -r requirements.txt
221
+
222
+ # Configure environment
223
+ cp .env.example .env
224
+ # Edit .env with production values
225
+ ```
226
+
227
+ **3. Systemd Service**
228
+ ```ini
229
+ # /etc/systemd/system/transcription.service
230
+ [Unit]
231
+ Description=Azure Speech Transcription Service
232
+ After=network.target
233
+
234
+ [Service]
235
+ Type=simple
236
+ User=transcription
237
+ Group=transcription
238
+ WorkingDirectory=/home/transcription/app
239
+ Environment=PATH=/home/transcription/app/venv/bin
240
+ ExecStart=/home/transcription/app/venv/bin/python gradio_app.py
241
+ Restart=always
242
+ RestartSec=10
243
+
244
+ [Install]
245
+ WantedBy=multi-user.target
246
+ ```
247
+
248
+ **4. Nginx Configuration**
249
+ ```nginx
250
+ # /etc/nginx/sites-available/transcription
251
+ server {
252
+ listen 80;
253
+ server_name your-domain.com;
254
+ client_max_body_size 500M;
255
+
256
+ location / {
257
+ proxy_pass http://127.0.0.1:7860;
258
+ proxy_set_header Host $host;
259
+ proxy_set_header X-Real-IP $remote_addr;
260
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
261
+ proxy_set_header X-Forwarded-Proto $scheme;
262
+ proxy_read_timeout 300s;
263
+ proxy_connect_timeout 75s;
264
+ }
265
+ }
266
+ ```
267
+
268
+ **5. SSL Certificate**
269
+ ```bash
270
+ # Install Certbot
271
+ sudo apt install certbot python3-certbot-nginx -y
272
+
273
+ # Get SSL certificate
274
+ sudo certbot --nginx -d your-domain.com
275
+
276
+ # Verify auto-renewal
277
+ sudo certbot renew --dry-run
278
+ ```
279
+
280
+ **6. Start Services**
281
+ ```bash
282
+ # Enable and start application
283
+ sudo systemctl enable transcription
284
+ sudo systemctl start transcription
285
+
286
+ # Enable and restart nginx
287
+ sudo systemctl enable nginx
288
+ sudo systemctl restart nginx
289
+
290
+ # Check status
291
+ sudo systemctl status transcription
292
+ sudo systemctl status nginx
293
+ ```
294
+
295
+ #### Option 2: Docker Deployment
296
+
297
+ **1. Dockerfile**
298
+ ```dockerfile
299
+ FROM python:3.9-slim
300
+
301
+ # Install system dependencies
302
+ RUN apt-get update && apt-get install -y \
303
+ ffmpeg \
304
+ && rm -rf /var/lib/apt/lists/*
305
+
306
+ # Set working directory
307
+ WORKDIR /app
308
+
309
+ # Copy requirements and install Python dependencies
310
+ COPY requirements.txt .
311
+ RUN pip install --no-cache-dir -r requirements.txt
312
+
313
+ # Copy application code
314
+ COPY . .
315
+
316
+ # Create necessary directories
317
+ RUN mkdir -p uploads database temp
318
+
319
+ # Expose port
320
+ EXPOSE 7860
321
+
322
+ # Run application
323
+ CMD ["python", "gradio_app.py"]
324
+ ```
325
+
326
+ **2. Docker Compose**
327
+ ```yaml
328
+ # docker-compose.yml
329
+ version: '3.8'
330
+
331
+ services:
332
+ transcription:
333
+ build: .
334
+ ports:
335
+ - "7860:7860"
336
+ environment:
337
+ - AZURE_SPEECH_KEY=${AZURE_SPEECH_KEY}
338
+ - AZURE_SPEECH_KEY_ENDPOINT=${AZURE_SPEECH_KEY_ENDPOINT}
339
+ - AZURE_REGION=${AZURE_REGION}
340
+ - AZURE_BLOB_CONNECTION=${AZURE_BLOB_CONNECTION}
341
+ - AZURE_CONTAINER=${AZURE_CONTAINER}
342
+ - AZURE_BLOB_SAS_TOKEN=${AZURE_BLOB_SAS_TOKEN}
343
+ - ALLOWED_LANGS=${ALLOWED_LANGS}
344
+ volumes:
345
+ - ./uploads:/app/uploads
346
+ - ./database:/app/database
347
+ - ./temp:/app/temp
348
+ restart: unless-stopped
349
+
350
+ nginx:
351
+ image: nginx:alpine
352
+ ports:
353
+ - "80:80"
354
+ - "443:443"
355
+ volumes:
356
+ - ./nginx.conf:/etc/nginx/nginx.conf
357
+ - ./ssl:/etc/ssl/certs
358
+ depends_on:
359
+ - transcription
360
+ restart: unless-stopped
361
+ ```
362
+
363
+ **3. Deploy with Docker**
364
+ ```bash
365
+ # Build and start
366
+ docker-compose up -d
367
+
368
+ # View logs
369
+ docker-compose logs -f transcription
370
+
371
+ # Update application
372
+ git pull
373
+ docker-compose build transcription
374
+ docker-compose up -d transcription
375
+ ```
376
+
377
+ #### Option 3: Cloud Deployment (Azure Container Instances)
378
+
379
+ **1. Create Container Registry**
380
+ ```bash
381
+ # Create ACR
382
+ az acr create --resource-group myResourceGroup \
383
+ --name myregistry --sku Basic
384
+
385
+ # Login to ACR
386
+ az acr login --name myregistry
387
+
388
+ # Build and push image
389
+ docker build -t myregistry.azurecr.io/transcription:latest .
390
+ docker push myregistry.azurecr.io/transcription:latest
391
+ ```
392
+
393
+ **2. Deploy Container Instance**
394
+ ```bash
395
+ # Create container instance
396
+ az container create \
397
+ --resource-group myResourceGroup \
398
+ --name transcription-app \
399
+ --image myregistry.azurecr.io/transcription:latest \
400
+ --cpu 2 --memory 4 \
401
+ --port 7860 \
402
+ --environment-variables \
403
+ AZURE_SPEECH_KEY=$AZURE_SPEECH_KEY \
404
+ AZURE_SPEECH_KEY_ENDPOINT=$AZURE_SPEECH_KEY_ENDPOINT \
405
+ AZURE_REGION=$AZURE_REGION \
406
+ AZURE_BLOB_CONNECTION="$AZURE_BLOB_CONNECTION" \
407
+ AZURE_CONTAINER=$AZURE_CONTAINER \
408
+ AZURE_BLOB_SAS_TOKEN="$AZURE_BLOB_SAS_TOKEN"
409
+ ```
410
+
411
+ ---
412
+
413
+ ## 📡 API Documentation
414
+
415
+ ### Core Classes and Methods
416
+
417
+ #### TranscriptionManager Class
418
+
419
+ **Purpose**: Main service class handling all transcription operations
420
+
421
+ ```python
422
+ class TranscriptionManager:
423
+ def __init__(self)
424
+
425
+ # User Authentication
426
+ def register_user(email: str, username: str, password: str,
427
+ gdpr_consent: bool, data_retention_agreed: bool,
428
+ marketing_consent: bool) -> Tuple[bool, str, Optional[str]]
429
+
430
+ def login_user(login: str, password: str) -> Tuple[bool, str, Optional[User]]
431
+
432
+ # Transcription Operations
433
+ def submit_transcription(file_bytes: bytes, original_filename: str,
434
+ user_id: str, language: str,
435
+ settings: Dict) -> str
436
+
437
+ def get_job_status(job_id: str) -> Optional[TranscriptionJob]
438
+
439
+ # Data Management
440
+ def get_user_history(user_id: str, limit: int) -> List[TranscriptionJob]
441
+ def get_user_stats(user_id: str) -> Dict
442
+ def export_user_data(user_id: str) -> Dict
443
+ def delete_user_account(user_id: str) -> bool
444
+ ```
445
+
446
+ #### DatabaseManager Class
447
+
448
+ **Purpose**: Handle database operations and Azure blob synchronization
449
+
450
+ ```python
451
+ class DatabaseManager:
452
+ def __init__(db_path: str = None)
453
+
454
+ # User Operations
455
+ def create_user(...) -> Tuple[bool, str, Optional[str]]
456
+ def authenticate_user(login: str, password: str) -> Tuple[bool, str, Optional[User]]
457
+ def get_user_by_id(user_id: str) -> Optional[User]
458
+
459
+ # Job Operations
460
+ def save_job(job: TranscriptionJob)
461
+ def get_job(job_id: str) -> Optional[TranscriptionJob]
462
+ def get_user_jobs(user_id: str, limit: int) -> List[TranscriptionJob]
463
+ def get_pending_jobs() -> List[TranscriptionJob]
464
+ ```
465
+
466
+ #### AuthManager Class
467
+
468
+ **Purpose**: Authentication utilities and validation
469
+
470
+ ```python
471
+ class AuthManager:
472
+ @staticmethod
473
+ def hash_password(password: str) -> str
474
+ def verify_password(password: str, password_hash: str) -> bool
475
+ def validate_email(email: str) -> bool
476
+ def validate_username(username: str) -> bool
477
+ def validate_password(password: str) -> Tuple[bool, str]
478
+ ```
479
+
480
+ ### Data Models
481
+
482
+ #### User Model
483
+ ```python
484
+ @dataclass
485
+ class User:
486
+ user_id: str
487
+ email: str
488
+ username: str
489
+ password_hash: str
490
+ created_at: str
491
+ last_login: Optional[str] = None
492
+ is_active: bool = True
493
+ gdpr_consent: bool = False
494
+ data_retention_agreed: bool = False
495
+ marketing_consent: bool = False
496
+ ```
497
+
498
+ #### TranscriptionJob Model
499
+ ```python
500
+ @dataclass
501
+ class TranscriptionJob:
502
+ job_id: str
503
+ user_id: str
504
+ original_filename: str
505
+ audio_url: str
506
+ language: str
507
+ status: str # pending, processing, completed, failed
508
+ created_at: str
509
+ completed_at: Optional[str] = None
510
+ transcript_text: Optional[str] = None
511
+ transcript_url: Optional[str] = None
512
+ error_message: Optional[str] = None
513
+ azure_trans_id: Optional[str] = None
514
+ settings: Optional[Dict] = None
515
+ ```
516
+
517
+ ### Configuration Parameters
518
+
519
+ #### Environment Variables
520
+ ```python
521
+ # Required
522
+ AZURE_SPEECH_KEY: str
523
+ AZURE_SPEECH_KEY_ENDPOINT: str
524
+ AZURE_REGION: str
525
+ AZURE_BLOB_CONNECTION: str
526
+ AZURE_CONTAINER: str
527
+ AZURE_BLOB_SAS_TOKEN: str
528
+
529
+ # Optional
530
+ ALLOWED_LANGS: str # JSON string
531
+ API_VERSION: str = "v3.2"
532
+ PASSWORD_SALT: str = "default_salt"
533
+ MAX_FILE_SIZE_MB: int = 500
534
+ ```
535
+
536
+ #### Transcription Settings
537
+ ```python
538
+ settings = {
539
+ 'audio_format': str, # wav, mp3, etc.
540
+ 'diarization_enabled': bool, # Speaker identification
541
+ 'speakers': int, # Max speakers (1-10)
542
+ 'profanity': str, # masked, removed, raw
543
+ 'punctuation': str, # automatic, dictated, none
544
+ 'timestamps': bool, # Include timestamps
545
+ 'lexical': bool, # Include lexical forms
546
+ 'language_id_enabled': bool, # Auto language detection
547
+ 'candidate_locales': List[str] # Language candidates
548
+ }
549
+ ```
550
+
551
+ ---
552
+
553
+ ## 🗄️ Database Schema
554
+
555
+ ### SQLite Database Structure
556
+
557
+ #### Users Table
558
+ ```sql
559
+ CREATE TABLE users (
560
+ user_id TEXT PRIMARY KEY,
561
+ email TEXT UNIQUE NOT NULL,
562
+ username TEXT UNIQUE NOT NULL,
563
+ password_hash TEXT NOT NULL,
564
+ created_at TEXT NOT NULL,
565
+ last_login TEXT,
566
+ is_active BOOLEAN DEFAULT 1,
567
+ gdpr_consent BOOLEAN DEFAULT 0,
568
+ data_retention_agreed BOOLEAN DEFAULT 0,
569
+ marketing_consent BOOLEAN DEFAULT 0
570
+ );
571
+
572
+ -- Indexes
573
+ CREATE INDEX idx_users_email ON users(email);
574
+ CREATE INDEX idx_users_username ON users(username);
575
+ ```
576
+
577
+ #### Transcriptions Table
578
+ ```sql
579
+ CREATE TABLE transcriptions (
580
+ job_id TEXT PRIMARY KEY,
581
+ user_id TEXT NOT NULL,
582
+ original_filename TEXT NOT NULL,
583
+ audio_url TEXT,
584
+ language TEXT NOT NULL,
585
+ status TEXT NOT NULL,
586
+ created_at TEXT NOT NULL,
587
+ completed_at TEXT,
588
+ transcript_text TEXT,
589
+ transcript_url TEXT,
590
+ error_message TEXT,
591
+ azure_trans_id TEXT,
592
+ settings TEXT,
593
+ FOREIGN KEY (user_id) REFERENCES users (user_id)
594
+ );
595
+
596
+ -- Indexes
597
+ CREATE INDEX idx_transcriptions_user_id ON transcriptions(user_id);
598
+ CREATE INDEX idx_transcriptions_status ON transcriptions(status);
599
+ CREATE INDEX idx_transcriptions_created_at ON transcriptions(created_at DESC);
600
+ CREATE INDEX idx_transcriptions_user_created ON transcriptions(user_id, created_at DESC);
601
+ ```
602
+
603
+ ### Azure Blob Storage Structure
604
+
605
+ ```
606
+ Container: {AZURE_CONTAINER}/
607
+ ├── shared/
608
+ │ └── database/
609
+ │ └── transcriptions.db # Shared database backup
610
+ ├── users/
611
+ │ ├── {user-id-1}/
612
+ │ │ ├── audio/ # Processed audio files
613
+ │ │ │ ├── {job-id-1}.wav
614
+ │ │ │ └── {job-id-2}.wav
615
+ │ │ ├── transcripts/ # Transcript files
616
+ │ │ │ ├── {job-id-1}.txt
617
+ │ │ │ └── {job-id-2}.txt
618
+ │ │ └── originals/ # Original uploaded files
619
+ │ │ ├── {job-id-1}_{filename}.mp4
620
+ │ │ └── {job-id-2}_{filename}.wav
621
+ │ └── {user-id-2}/
622
+ │ ├── audio/
623
+ │ ├── transcripts/
624
+ │ └── originals/
625
+ ```
626
+
627
+ ### Database Operations
628
+
629
+ #### User Management Queries
630
+ ```sql
631
+ -- Create user
632
+ INSERT INTO users (user_id, email, username, password_hash, created_at,
633
+ gdpr_consent, data_retention_agreed, marketing_consent)
634
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?);
635
+
636
+ -- Authenticate user
637
+ SELECT * FROM users
638
+ WHERE (email = ? OR username = ?) AND is_active = 1;
639
+
640
+ -- Update last login
641
+ UPDATE users SET last_login = ? WHERE user_id = ?;
642
+
643
+ -- Get user stats
644
+ SELECT status, COUNT(*) FROM transcriptions
645
+ WHERE user_id = ? GROUP BY status;
646
+ ```
647
+
648
+ #### Job Management Queries
649
+ ```sql
650
+ -- Create job
651
+ INSERT INTO transcriptions (job_id, user_id, original_filename, language,
652
+ status, created_at, settings)
653
+ VALUES (?, ?, ?, ?, 'pending', ?, ?);
654
+
655
+ -- Update job status
656
+ UPDATE transcriptions
657
+ SET status = ?, completed_at = ?, transcript_text = ?, transcript_url = ?
658
+ WHERE job_id = ?;
659
+
660
+ -- Get user jobs
661
+ SELECT * FROM transcriptions
662
+ WHERE user_id = ?
663
+ ORDER BY created_at DESC LIMIT ?;
664
+
665
+ -- Get pending jobs for background processor
666
+ SELECT * FROM transcriptions
667
+ WHERE status IN ('pending', 'processing');
668
+ ```
669
+
670
+ ---
671
+
672
+ ## 🔒 Security Implementation
673
+
674
+ ### Authentication Security
675
+
676
+ #### Password Security
677
+ ```python
678
+ # Password hashing with salt
679
+ def hash_password(password: str) -> str:
680
+ salt = os.environ.get("PASSWORD_SALT", "default_salt")
681
+ return hashlib.sha256((password + salt).encode()).hexdigest()
682
+
683
+ # Password validation
684
+ def validate_password(password: str) -> Tuple[bool, str]:
685
+ if len(password) < 8:
686
+ return False, "Password must be at least 8 characters"
687
+ if not re.search(r'[A-Z]', password):
688
+ return False, "Password must contain uppercase letter"
689
+ if not re.search(r'[a-z]', password):
690
+ return False, "Password must contain lowercase letter"
691
+ if not re.search(r'\d', password):
692
+ return False, "Password must contain number"
693
+ return True, "Valid"
694
+ ```
695
+
696
+ #### Session Management
697
+ ```python
698
+ # User session state
699
+ session_state = {
700
+ 'user_id': str,
701
+ 'username': str,
702
+ 'logged_in_at': datetime,
703
+ 'last_activity': datetime
704
+ }
705
+
706
+ # Session validation
707
+ def validate_session(session_state: dict) -> bool:
708
+ if not session_state or 'user_id' not in session_state:
709
+ return False
710
+
711
+ # Check session timeout (if implemented)
712
+ last_activity = session_state.get('last_activity')
713
+ if last_activity:
714
+ timeout = timedelta(hours=24) # 24-hour sessions
715
+ if datetime.now() - last_activity > timeout:
716
+ return False
717
+
718
+ return True
719
+ ```
720
+
721
+ ### Data Security
722
+
723
+ #### Access Control
724
+ ```python
725
+ # User data access verification
726
+ def verify_user_access(job_id: str, user_id: str) -> bool:
727
+ job = get_job(job_id)
728
+ return job and job.user_id == user_id
729
+
730
+ # File path security
731
+ def get_user_blob_path(user_id: str, blob_type: str, filename: str) -> str:
732
+ # Ensure user can only access their own folder
733
+ safe_filename = os.path.basename(filename) # Prevent path traversal
734
+ return f"users/{user_id}/{blob_type}/{safe_filename}"
735
+ ```
736
+
737
+ #### Data Encryption
738
+ ```python
739
+ # Azure Blob Storage encryption (configured at Azure level)
740
+ # - Encryption at rest: Enabled by default
741
+ # - Encryption in transit: HTTPS enforced
742
+ # - Customer-managed keys: Optional enhancement
743
+
744
+ # Database encryption (for sensitive fields)
745
+ from cryptography.fernet import Fernet
746
+
747
+ def encrypt_sensitive_data(data: str, key: bytes) -> str:
748
+ f = Fernet(key)
749
+ return f.encrypt(data.encode()).decode()
750
+
751
+ def decrypt_sensitive_data(encrypted_data: str, key: bytes) -> str:
752
+ f = Fernet(key)
753
+ return f.decrypt(encrypted_data.encode()).decode()
754
+ ```
755
+
756
+ ### Azure Security
757
+
758
+ #### Blob Storage Security
759
+ ```python
760
+ # SAS token configuration for least privilege
761
+ sas_permissions = BlobSasPermissions(
762
+ read=True,
763
+ write=True,
764
+ delete=True,
765
+ list=True
766
+ )
767
+
768
+ # IP restrictions (optional)
769
+ sas_ip_range = "192.168.1.0/24" # Restrict to specific IP range
770
+
771
+ # Time-limited tokens
772
+ sas_expiry = datetime.utcnow() + timedelta(hours=1)
773
+ ```
774
+
775
+ #### Speech Service Security
776
+ ```python
777
+ # Secure API calls
778
+ headers = {
779
+ "Ocp-Apim-Subscription-Key": AZURE_SPEECH_KEY,
780
+ "Content-Type": "application/json"
781
+ }
782
+
783
+ # Request timeout and retry logic
784
+ response = requests.post(
785
+ url,
786
+ headers=headers,
787
+ json=body,
788
+ timeout=30,
789
+ verify=True # Verify SSL certificates
790
+ )
791
+ ```
792
+
793
+ ### Input Validation
794
+
795
+ #### File Upload Security
796
+ ```python
797
+ def validate_uploaded_file(file_path: str, max_size: int = 500 * 1024 * 1024) -> Tuple[bool, str]:
798
+ try:
799
+ # Check file exists
800
+ if not os.path.exists(file_path):
801
+ return False, "File not found"
802
+
803
+ # Check file size
804
+ file_size = os.path.getsize(file_path)
805
+ if file_size > max_size:
806
+ return False, f"File too large: {file_size / 1024 / 1024:.1f}MB"
807
+
808
+ # Check file type by content (not just extension)
809
+ import magic
810
+ mime_type = magic.from_file(file_path, mime=True)
811
+ allowed_types = ['audio/', 'video/']
812
+ if not any(mime_type.startswith(t) for t in allowed_types):
813
+ return False, f"Invalid file type: {mime_type}"
814
+
815
+ return True, "Valid"
816
+
817
+ except Exception as e:
818
+ return False, f"Validation error: {str(e)}"
819
+ ```
820
+
821
+ #### SQL Injection Prevention
822
+ ```python
823
+ # Use parameterized queries (already implemented)
824
+ cursor.execute(
825
+ "SELECT * FROM users WHERE email = ? AND password_hash = ?",
826
+ (email, password_hash)
827
+ )
828
+
829
+ # Input sanitization
830
+ def sanitize_input(user_input: str) -> str:
831
+ # Remove dangerous characters
832
+ import html
833
+ sanitized = html.escape(user_input)
834
+ # Limit length
835
+ return sanitized[:1000]
836
+ ```
837
+
838
+ ---
839
+
840
+ ## 📊 Monitoring & Maintenance
841
+
842
+ ### Application Monitoring
843
+
844
+ #### Health Checks
845
+ ```python
846
+ def health_check() -> Dict[str, Any]:
847
+ """System health check endpoint"""
848
+ try:
849
+ # Database check
850
+ db_status = check_database_connection()
851
+
852
+ # Azure services check
853
+ blob_status = check_blob_storage()
854
+ speech_status = check_speech_service()
855
+
856
+ # FFmpeg check
857
+ ffmpeg_status = check_ffmpeg_installation()
858
+
859
+ # Disk space check
860
+ disk_status = check_disk_space()
861
+
862
+ return {
863
+ 'status': 'healthy' if all([db_status, blob_status, speech_status, ffmpeg_status]) else 'unhealthy',
864
+ 'timestamp': datetime.now().isoformat(),
865
+ 'services': {
866
+ 'database': db_status,
867
+ 'blob_storage': blob_status,
868
+ 'speech_service': speech_status,
869
+ 'ffmpeg': ffmpeg_status,
870
+ 'disk_space': disk_status
871
+ }
872
+ }
873
+
874
+ except Exception as e:
875
+ return {
876
+ 'status': 'error',
877
+ 'timestamp': datetime.now().isoformat(),
878
+ 'error': str(e)
879
+ }
880
+
881
+ def check_database_connection() -> bool:
882
+ try:
883
+ with transcription_manager.db.get_connection() as conn:
884
+ conn.execute("SELECT 1").fetchone()
885
+ return True
886
+ except:
887
+ return False
888
+
889
+ def check_blob_storage() -> bool:
890
+ try:
891
+ client = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION)
892
+ client.list_containers(max_results=1)
893
+ return True
894
+ except:
895
+ return False
896
+ ```
897
+
898
+ #### Logging Configuration
899
+ ```python
900
+ import logging
901
+ from logging.handlers import RotatingFileHandler
902
+
903
+ def setup_logging():
904
+ """Configure application logging"""
905
+
906
+ # Create formatter
907
+ formatter = logging.Formatter(
908
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
909
+ )
910
+
911
+ # Console handler
912
+ console_handler = logging.StreamHandler()
913
+ console_handler.setFormatter(formatter)
914
+ console_handler.setLevel(logging.INFO)
915
+
916
+ # File handler with rotation
917
+ file_handler = RotatingFileHandler(
918
+ 'logs/transcription.log',
919
+ maxBytes=10*1024*1024, # 10MB
920
+ backupCount=5
921
+ )
922
+ file_handler.setFormatter(formatter)
923
+ file_handler.setLevel(logging.DEBUG)
924
+
925
+ # Configure root logger
926
+ logger = logging.getLogger()
927
+ logger.setLevel(logging.DEBUG)
928
+ logger.addHandler(console_handler)
929
+ logger.addHandler(file_handler)
930
+
931
+ # Separate logger for sensitive operations
932
+ auth_logger = logging.getLogger('auth')
933
+ auth_handler = RotatingFileHandler(
934
+ 'logs/auth.log',
935
+ maxBytes=5*1024*1024, # 5MB
936
+ backupCount=10
937
+ )
938
+ auth_handler.setFormatter(formatter)
939
+ auth_logger.addHandler(auth_handler)
940
+ auth_logger.setLevel(logging.INFO)
941
+ ```
942
+
943
+ #### Performance Monitoring
944
+ ```python
945
+ import time
946
+ from functools import wraps
947
+
948
+ def monitor_performance(func):
949
+ """Decorator to monitor function performance"""
950
+ @wraps(func)
951
+ def wrapper(*args, **kwargs):
952
+ start_time = time.time()
953
+ try:
954
+ result = func(*args, **kwargs)
955
+ duration = time.time() - start_time
956
+ logging.info(f"{func.__name__} completed in {duration:.2f}s")
957
+ return result
958
+ except Exception as e:
959
+ duration = time.time() - start_time
960
+ logging.error(f"{func.__name__} failed after {duration:.2f}s: {str(e)}")
961
+ raise
962
+ return wrapper
963
+
964
+ # Usage
965
+ @monitor_performance
966
+ def submit_transcription(self, file_bytes, filename, user_id, language, settings):
967
+ # Implementation here
968
+ pass
969
+ ```
970
+
971
+ ### Database Maintenance
972
+
973
+ #### Backup Strategy
974
+ ```python
975
+ def backup_database():
976
+ """Backup database to Azure Blob Storage"""
977
+ try:
978
+ # Create timestamped backup
979
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
980
+ backup_name = f"shared/backups/transcriptions_backup_{timestamp}.db"
981
+
982
+ # Upload current database
983
+ blob_client = blob_service.get_blob_client(
984
+ container=AZURE_CONTAINER,
985
+ blob=backup_name
986
+ )
987
+
988
+ with open(db_path, "rb") as data:
989
+ blob_client.upload_blob(data)
990
+
991
+ logging.info(f"Database backup created: {backup_name}")
992
+
993
+ # Clean old backups (keep last 30 days)
994
+ cleanup_old_backups()
995
+
996
+ except Exception as e:
997
+ logging.error(f"Database backup failed: {str(e)}")
998
+
999
+ def cleanup_old_backups():
1000
+ """Remove backups older than 30 days"""
1001
+ try:
1002
+ cutoff_date = datetime.now() - timedelta(days=30)
1003
+ container_client = blob_service.get_container_client(AZURE_CONTAINER)
1004
+
1005
+ for blob in container_client.list_blobs(name_starts_with="shared/backups/"):
1006
+ if blob.last_modified < cutoff_date:
1007
+ blob_service.delete_blob(AZURE_CONTAINER, blob.name)
1008
+ logging.info(f"Deleted old backup: {blob.name}")
1009
+
1010
+ except Exception as e:
1011
+ logging.error(f"Backup cleanup failed: {str(e)}")
1012
+ ```
1013
+
1014
+ #### Database Optimization
1015
+ ```python
1016
+ def optimize_database():
1017
+ """Optimize database performance"""
1018
+ try:
1019
+ with transcription_manager.db.get_connection() as conn:
1020
+ # Analyze tables
1021
+ conn.execute("ANALYZE")
1022
+
1023
+ # Vacuum database (compact)
1024
+ conn.execute("VACUUM")
1025
+
1026
+ # Update statistics
1027
+ conn.execute("PRAGMA optimize")
1028
+
1029
+ logging.info("Database optimization completed")
1030
+
1031
+ except Exception as e:
1032
+ logging.error(f"Database optimization failed: {str(e)}")
1033
+
1034
+ # Schedule optimization (run weekly)
1035
+ import schedule
1036
+
1037
+ schedule.every().week.do(optimize_database)
1038
+ schedule.every().day.at("02:00").do(backup_database)
1039
+ ```
1040
+
1041
+ ### Resource Management
1042
+
1043
+ #### Cleanup Tasks
1044
+ ```python
1045
+ def cleanup_temporary_files():
1046
+ """Clean up temporary files older than 24 hours"""
1047
+ try:
1048
+ cutoff_time = time.time() - (24 * 60 * 60) # 24 hours ago
1049
+ temp_dirs = ['uploads', 'temp']
1050
+
1051
+ for temp_dir in temp_dirs:
1052
+ if os.path.exists(temp_dir):
1053
+ for filename in os.listdir(temp_dir):
1054
+ filepath = os.path.join(temp_dir, filename)
1055
+ if os.path.isfile(filepath) and os.path.getmtime(filepath) < cutoff_time:
1056
+ os.remove(filepath)
1057
+ logging.info(f"Cleaned up temporary file: {filepath}")
1058
+
1059
+ except Exception as e:
1060
+ logging.error(f"Temporary file cleanup failed: {str(e)}")
1061
+
1062
+ def monitor_disk_space():
1063
+ """Monitor and alert on disk space"""
1064
+ try:
1065
+ import shutil
1066
+ total, used, free = shutil.disk_usage("/")
1067
+
1068
+ # Convert to GB
1069
+ free_gb = free // (1024**3)
1070
+ total_gb = total // (1024**3)
1071
+ usage_percent = (used / total) * 100
1072
+
1073
+ if usage_percent > 85:
1074
+ logging.warning(f"High disk usage: {usage_percent:.1f}% ({free_gb}GB free)")
1075
+
1076
+ if free_gb < 5:
1077
+ logging.critical(f"Low disk space: {free_gb}GB remaining")
1078
+
1079
+ except Exception as e:
1080
+ logging.error(f"Disk space monitoring failed: {str(e)}")
1081
+ ```
1082
+
1083
+ ### Monitoring Alerts
1084
+
1085
+ #### Email Alerts (Optional)
1086
+ ```python
1087
+ import smtplib
1088
+ from email.mime.text import MIMEText
1089
+
1090
+ def send_alert(subject: str, message: str):
1091
+ """Send email alert for critical issues"""
1092
+ try:
1093
+ smtp_server = os.environ.get("SMTP_SERVER")
1094
+ smtp_port = int(os.environ.get("SMTP_PORT", "587"))
1095
+ smtp_user = os.environ.get("SMTP_USER")
1096
+ smtp_pass = os.environ.get("SMTP_PASS")
1097
+ alert_email = os.environ.get("ALERT_EMAIL")
1098
+
1099
+ if not all([smtp_server, smtp_user, smtp_pass, alert_email]):
1100
+ return # Email not configured
1101
+
1102
+ msg = MIMEText(message)
1103
+ msg['Subject'] = f"[Transcription Service] {subject}"
1104
+ msg['From'] = smtp_user
1105
+ msg['To'] = alert_email
1106
+
1107
+ with smtplib.SMTP(smtp_server, smtp_port) as server:
1108
+ server.starttls()
1109
+ server.login(smtp_user, smtp_pass)
1110
+ server.send_message(msg)
1111
+
1112
+ except Exception as e:
1113
+ logging.error(f"Failed to send alert: {str(e)}")
1114
+ ```
1115
+
1116
+ ---
1117
+
1118
+ ## 🤝 Contributing Guidelines
1119
+
1120
+ ### Development Workflow
1121
+
1122
+ #### 1. Setup Development Environment
1123
+ ```bash
1124
+ # Fork repository
1125
+ git clone https://github.com/your-username/azure-speech-transcription.git
1126
+ cd azure-speech-transcription
1127
+
1128
+ # Create feature branch
1129
+ git checkout -b feature/your-feature-name
1130
+
1131
+ # Setup environment
1132
+ python -m venv venv
1133
+ source venv/bin/activate # or venv\Scripts\activate on Windows
1134
+ pip install -r requirements.txt
1135
+ pip install -r requirements-dev.txt # Development dependencies
1136
+ ```
1137
+
1138
+ #### 2. Code Quality Standards
1139
+
1140
+ **Python Style Guide**
1141
+ - Follow PEP 8 style guidelines
1142
+ - Use type hints for function parameters and return values
1143
+ - Maximum line length: 88 characters (Black formatter)
1144
+ - Use meaningful variable and function names
1145
+
1146
+ **Code Formatting**
1147
+ ```bash
1148
+ # Install development tools
1149
+ pip install black flake8 mypy pytest
1150
+
1151
+ # Format code
1152
+ black .
1153
+
1154
+ # Check style
1155
+ flake8 .
1156
+
1157
+ # Type checking
1158
+ mypy app_core.py gradio_app.py
1159
+
1160
+ # Run tests
1161
+ pytest tests/
1162
+ ```
1163
+
1164
+ **Documentation Standards**
1165
+ - All functions must have docstrings
1166
+ - Include type hints
1167
+ - Document complex logic with inline comments
1168
+ - Update README.md for new features
1169
+
1170
+ ```python
1171
+ def submit_transcription(
1172
+ self,
1173
+ file_bytes: bytes,
1174
+ original_filename: str,
1175
+ user_id: str,
1176
+ language: str,
1177
+ settings: Dict[str, Any]
1178
+ ) -> str:
1179
+ """
1180
+ Submit a new transcription job for processing.
1181
+
1182
+ Args:
1183
+ file_bytes: Raw bytes of the audio/video file
1184
+ original_filename: Original name of the uploaded file
1185
+ user_id: ID of the authenticated user
1186
+ language: Language code for transcription (e.g., 'en-US')
1187
+ settings: Transcription configuration options
1188
+
1189
+ Returns:
1190
+ str: Unique job ID for tracking transcription progress
1191
+
1192
+ Raises:
1193
+ ValueError: If user_id is invalid or file is too large
1194
+ ConnectionError: If Azure services are unavailable
1195
+ """
1196
+ ```
1197
+
1198
+ #### 3. Testing Requirements
1199
+
1200
+ **Unit Tests**
1201
+ ```python
1202
+ import pytest
1203
+ from unittest.mock import Mock, patch
1204
+ from app_core import TranscriptionManager, AuthManager
1205
+
1206
+ class TestAuthManager:
1207
+ def test_password_hashing(self):
1208
+ password = "TestPassword123"
1209
+ hashed = AuthManager.hash_password(password)
1210
+
1211
+ assert hashed != password
1212
+ assert AuthManager.verify_password(password, hashed)
1213
+ assert not AuthManager.verify_password("wrong", hashed)
1214
+
1215
+ def test_email_validation(self):
1216
+ assert AuthManager.validate_email("test@example.com")
1217
+ assert not AuthManager.validate_email("invalid-email")
1218
+ assert not AuthManager.validate_email("")
1219
+
1220
+ class TestTranscriptionManager:
1221
+ @patch('app_core.BlobServiceClient')
1222
+ def test_submit_transcription(self, mock_blob):
1223
+ manager = TranscriptionManager()
1224
+
1225
+ job_id = manager.submit_transcription(
1226
+ b"fake audio data",
1227
+ "test.wav",
1228
+ "user123",
1229
+ "en-US",
1230
+ {"audio_format": "wav"}
1231
+ )
1232
+
1233
+ assert isinstance(job_id, str)
1234
+ assert len(job_id) == 36 # UUID length
1235
+ ```
1236
+
1237
+ **Integration Tests**
1238
+ ```python
1239
+ class TestIntegration:
1240
+ def test_full_transcription_workflow(self):
1241
+ # Test complete workflow from upload to download
1242
+ pass
1243
+
1244
+ def test_user_registration_and_login(self):
1245
+ # Test complete auth workflow
1246
+ pass
1247
+ ```
1248
+
1249
+ #### 4. Commit Guidelines
1250
+
1251
+ **Commit Message Format**
1252
+ ```
1253
+ type(scope): brief description
1254
+
1255
+ Detailed explanation of changes if needed
1256
+
1257
+ - List specific changes
1258
+ - Include any breaking changes
1259
+ - Reference issue numbers
1260
+
1261
+ Closes #123
1262
+ ```
1263
+
1264
+ **Commit Types**
1265
+ - `feat`: New feature
1266
+ - `fix`: Bug fix
1267
+ - `docs`: Documentation changes
1268
+ - `style`: Code style changes (formatting, etc.)
1269
+ - `refactor`: Code refactoring
1270
+ - `test`: Adding or updating tests
1271
+ - `chore`: Maintenance tasks
1272
+
1273
+ **Example Commits**
1274
+ ```bash
1275
+ git commit -m "feat(auth): add password strength validation
1276
+
1277
+ - Implement password complexity requirements
1278
+ - Add client-side validation feedback
1279
+ - Update registration form UI
1280
+
1281
+ Closes #45"
1282
+
1283
+ git commit -m "fix(transcription): handle Azure service timeouts
1284
+
1285
+ - Add retry logic for failed API calls
1286
+ - Improve error messages for users
1287
+ - Log detailed error information
1288
+
1289
+ Fixes #67"
1290
+ ```
1291
+
1292
+ #### 5. Pull Request Process
1293
+
1294
+ **PR Checklist**
1295
+ - [ ] Code follows style guidelines
1296
+ - [ ] All tests pass
1297
+ - [ ] Documentation updated
1298
+ - [ ] Security considerations reviewed
1299
+ - [ ] Performance impact assessed
1300
+ - [ ] Breaking changes documented
1301
+
1302
+ **PR Template**
1303
+ ```markdown
1304
+ ## Description
1305
+ Brief description of changes
1306
+
1307
+ ## Type of Change
1308
+ - [ ] Bug fix
1309
+ - [ ] New feature
1310
+ - [ ] Breaking change
1311
+ - [ ] Documentation update
1312
+
1313
+ ## Testing
1314
+ - [ ] Unit tests added/updated
1315
+ - [ ] Integration tests pass
1316
+ - [ ] Manual testing completed
1317
+
1318
+ ## Security
1319
+ - [ ] No sensitive data exposed
1320
+ - [ ] Input validation implemented
1321
+ - [ ] Access controls maintained
1322
+
1323
+ ## Performance
1324
+ - [ ] No performance degradation
1325
+ - [ ] Database queries optimized
1326
+ - [ ] Resource usage considered
1327
+ ```
1328
+
1329
+ ### Feature Development
1330
+
1331
+ #### Adding New Languages
1332
+ ```python
1333
+ # 1. Update environment configuration
1334
+ ALLOWED_LANGS = {
1335
+ "en-US": "English (United States)",
1336
+ "es-ES": "Spanish (Spain)",
1337
+ "new-LANG": "New Language Name"
1338
+ }
1339
+
1340
+ # 2. Test language support
1341
+ def test_new_language():
1342
+ # Verify Azure Speech Services supports the language
1343
+ # Test transcription accuracy
1344
+ # Update documentation
1345
+ ```
1346
+
1347
+ #### Adding New Audio Formats
1348
+ ```python
1349
+ # 1. Update supported formats list
1350
+ AUDIO_FORMATS = [
1351
+ "wav", "mp3", "ogg", "opus", "flac",
1352
+ "new_format" # Add new format
1353
+ ]
1354
+
1355
+ # 2. Update FFmpeg conversion logic
1356
+ def _convert_to_audio(self, input_path, output_path, audio_format="wav"):
1357
+ if audio_format == "new_format":
1358
+ # Add specific conversion parameters
1359
+ cmd = ["ffmpeg", "-i", input_path, "-codec", "new_codec", output_path]
1360
+ ```
1361
+
1362
+ #### Adding New Features
1363
+ ```python
1364
+ # 1. Database schema updates
1365
+ def upgrade_database_schema():
1366
+ with self.get_connection() as conn:
1367
+ conn.execute("""
1368
+ ALTER TABLE transcriptions
1369
+ ADD COLUMN new_feature_data TEXT
1370
+ """)
1371
+
1372
+ # 2. API endpoint updates
1373
+ def new_feature_endpoint(user_id: str, feature_data: Dict) -> Dict:
1374
+ # Implement new feature logic
1375
+ pass
1376
+
1377
+ # 3. UI updates
1378
+ def add_new_feature_ui():
1379
+ new_feature_input = gr.Textbox(label="New Feature")
1380
+ new_feature_button = gr.Button("Use New Feature")
1381
+ ```
1382
+
1383
+ ---
1384
+
1385
+ ## ⚙️ Advanced Configuration
1386
+
1387
+ ### Performance Optimization
1388
+
1389
+ #### Concurrent Processing
1390
+ ```python
1391
+ # Adjust worker thread pool size based on server capacity
1392
+ class TranscriptionManager:
1393
+ def __init__(self, max_workers: int = None):
1394
+ if max_workers is None:
1395
+ # Auto-detect based on CPU cores
1396
+ import multiprocessing
1397
+ max_workers = min(multiprocessing.cpu_count(), 10)
1398
+
1399
+ self.executor = ThreadPoolExecutor(max_workers=max_workers)
1400
+
1401
+ # Configure based on server specs
1402
+ # Small server: max_workers=2-4
1403
+ # Medium server: max_workers=5-8
1404
+ # Large server: max_workers=10+
1405
+ ```
1406
+
1407
+ #### Database Optimization
1408
+ ```python
1409
+ # SQLite performance tuning
1410
+ def configure_database_performance(db_path: str):
1411
+ with sqlite3.connect(db_path) as conn:
1412
+ # Enable WAL mode for better concurrency
1413
+ conn.execute("PRAGMA journal_mode=WAL")
1414
+
1415
+ # Increase cache size (in KB)
1416
+ conn.execute("PRAGMA cache_size=10000")
1417
+
1418
+ # Optimize synchronization
1419
+ conn.execute("PRAGMA synchronous=NORMAL")
1420
+
1421
+ # Enable foreign keys
1422
+ conn.execute("PRAGMA foreign_keys=ON")
1423
+ ```
1424
+
1425
+ #### Memory Management
1426
+ ```python
1427
+ # Large file handling
1428
+ def process_large_file(file_path: str):
1429
+ """Process large files in chunks to manage memory"""
1430
+ chunk_size = 64 * 1024 * 1024 # 64MB chunks
1431
+
1432
+ with open(file_path, 'rb') as f:
1433
+ while chunk := f.read(chunk_size):
1434
+ # Process chunk
1435
+ yield chunk
1436
+
1437
+ # Garbage collection for long-running processes
1438
+ import gc
1439
+
1440
+ def cleanup_memory():
1441
+ """Force garbage collection"""
1442
+ gc.collect()
1443
+
1444
+ # Schedule periodic cleanup
1445
+ schedule.every(30).minutes.do(cleanup_memory)
1446
+ ```
1447
+
1448
+ ### Security Hardening
1449
+
1450
+ #### Rate Limiting
1451
+ ```python
1452
+ from collections import defaultdict
1453
+ from time import time
1454
+
1455
+ class RateLimiter:
1456
+ def __init__(self, max_requests: int = 100, window: int = 3600):
1457
+ self.max_requests = max_requests
1458
+ self.window = window
1459
+ self.requests = defaultdict(list)
1460
+
1461
+ def is_allowed(self, user_id: str) -> bool:
1462
+ now = time()
1463
+ user_requests = self.requests[user_id]
1464
+
1465
+ # Clean old requests
1466
+ user_requests[:] = [req_time for req_time in user_requests
1467
+ if now - req_time < self.window]
1468
+
1469
+ # Check limit
1470
+ if len(user_requests) >= self.max_requests:
1471
+ return False
1472
+
1473
+ user_requests.append(now)
1474
+ return True
1475
+
1476
+ # Usage in endpoints
1477
+ rate_limiter = RateLimiter(max_requests=50, window=3600) # 50 per hour
1478
+
1479
+ def submit_transcription(self, user_id: str, ...):
1480
+ if not rate_limiter.is_allowed(user_id):
1481
+ raise Exception("Rate limit exceeded")
1482
+ ```
1483
+
1484
+ #### Input Sanitization
1485
+ ```python
1486
+ import bleach
1487
+ import re
1488
+
1489
+ def sanitize_filename(filename: str) -> str:
1490
+ """Sanitize uploaded filename"""
1491
+ # Remove path traversal attempts
1492
+ filename = os.path.basename(filename)
1493
+
1494
+ # Remove dangerous characters
1495
+ filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
1496
+
1497
+ # Limit length
1498
+ if len(filename) > 255:
1499
+ name, ext = os.path.splitext(filename)
1500
+ filename = name[:250] + ext
1501
+
1502
+ return filename
1503
+
1504
+ def sanitize_user_input(text: str) -> str:
1505
+ """Sanitize user text input"""
1506
+ # Remove HTML tags
1507
+ text = bleach.clean(text, tags=[], strip=True)
1508
+
1509
+ # Limit length
1510
+ text = text[:1000]
1511
+
1512
+ return text.strip()
1513
+ ```
1514
+
1515
+ #### Audit Logging
1516
+ ```python
1517
+ class AuditLogger:
1518
+ def __init__(self):
1519
+ self.logger = logging.getLogger('audit')
1520
+
1521
+ def log_user_action(self, user_id: str, action: str, details: Dict = None):
1522
+ """Log user actions for security auditing"""
1523
+ audit_entry = {
1524
+ 'timestamp': datetime.now().isoformat(),
1525
+ 'user_id': user_id,
1526
+ 'action': action,
1527
+ 'details': details or {},
1528
+ 'ip_address': self._get_client_ip(),
1529
+ 'user_agent': self._get_user_agent()
1530
+ }
1531
+
1532
+ self.logger.info(json.dumps(audit_entry))
1533
+
1534
+ def _get_client_ip(self) -> str:
1535
+ # Implementation depends on deployment setup
1536
+ return "unknown"
1537
+
1538
+ def _get_user_agent(self) -> str:
1539
+ # Implementation depends on deployment setup
1540
+ return "unknown"
1541
+
1542
+ # Usage
1543
+ audit = AuditLogger()
1544
+ audit.log_user_action(user_id, "login", {"success": True})
1545
+ audit.log_user_action(user_id, "transcription_submit", {"filename": filename})
1546
+ ```
1547
+
1548
+ ### Custom Extensions
1549
+
1550
+ #### Plugin Architecture
1551
+ ```python
1552
+ class TranscriptionPlugin:
1553
+ """Base class for transcription plugins"""
1554
+
1555
+ def pre_process(self, file_bytes: bytes, settings: Dict) -> bytes:
1556
+ """Pre-process audio before transcription"""
1557
+ return file_bytes
1558
+
1559
+ def post_process(self, transcript: str, settings: Dict) -> str:
1560
+ """Post-process transcript text"""
1561
+ return transcript
1562
+
1563
+ def get_name(self) -> str:
1564
+ """Return plugin name"""
1565
+ raise NotImplementedError
1566
+
1567
+ class NoiseReductionPlugin(TranscriptionPlugin):
1568
+ def get_name(self) -> str:
1569
+ return "noise_reduction"
1570
+
1571
+ def pre_process(self, file_bytes: bytes, settings: Dict) -> bytes:
1572
+ # Implement noise reduction using audio processing library
1573
+ # This is a placeholder - actual implementation would use
1574
+ # libraries like librosa, scipy, or pydub
1575
+ return file_bytes
1576
+
1577
+ class LanguageDetectionPlugin(TranscriptionPlugin):
1578
+ def get_name(self) -> str:
1579
+ return "language_detection"
1580
+
1581
+ def pre_process(self, file_bytes: bytes, settings: Dict) -> bytes:
1582
+ # Detect language and update settings
1583
+ detected_language = self._detect_language(file_bytes)
1584
+ settings['detected_language'] = detected_language
1585
+ return file_bytes
1586
+
1587
+ # Plugin manager
1588
+ class PluginManager:
1589
+ def __init__(self):
1590
+ self.plugins: List[TranscriptionPlugin] = []
1591
+
1592
+ def register_plugin(self, plugin: TranscriptionPlugin):
1593
+ self.plugins.append(plugin)
1594
+
1595
+ def apply_pre_processing(self, file_bytes: bytes, settings: Dict) -> bytes:
1596
+ for plugin in self.plugins:
1597
+ file_bytes = plugin.pre_process(file_bytes, settings)
1598
+ return file_bytes
1599
+
1600
+ def apply_post_processing(self, transcript: str, settings: Dict) -> str:
1601
+ for plugin in self.plugins:
1602
+ transcript = plugin.post_process(transcript, settings)
1603
+ return transcript
1604
+ ```
1605
+
1606
+ ---
1607
+
1608
+ ## 🔧 Troubleshooting
1609
+
1610
+ ### Common Development Issues
1611
+
1612
+ #### Environment Setup Problems
1613
+
1614
+ **Issue**: Azure connection fails
1615
+ ```bash
1616
+ # Check environment variables
1617
+ python -c "
1618
+ import os
1619
+ print('AZURE_SPEECH_KEY:', bool(os.getenv('AZURE_SPEECH_KEY')))
1620
+ print('AZURE_BLOB_CONNECTION:', bool(os.getenv('AZURE_BLOB_CONNECTION')))
1621
+ "
1622
+
1623
+ # Test Azure connection
1624
+ python -c "
1625
+ from azure.storage.blob import BlobServiceClient
1626
+ client = BlobServiceClient.from_connection_string('$AZURE_BLOB_CONNECTION')
1627
+ print('Containers:', list(client.list_containers()))
1628
+ "
1629
+ ```
1630
+
1631
+ **Issue**: FFmpeg not found
1632
+ ```bash
1633
+ # Check FFmpeg installation
1634
+ ffmpeg -version
1635
+
1636
+ # Install FFmpeg (Ubuntu/Debian)
1637
+ sudo apt update && sudo apt install ffmpeg
1638
+
1639
+ # Install FFmpeg (Windows with Chocolatey)
1640
+ choco install ffmpeg
1641
+
1642
+ # Install FFmpeg (macOS with Homebrew)
1643
+ brew install ffmpeg
1644
+ ```
1645
+
1646
+ **Issue**: Database initialization fails
1647
+ ```python
1648
+ # Check database permissions
1649
+ import os
1650
+ db_dir = "database"
1651
+ if not os.path.exists(db_dir):
1652
+ os.makedirs(db_dir)
1653
+ print(f"Created directory: {db_dir}")
1654
+
1655
+ # Test database creation
1656
+ import sqlite3
1657
+ conn = sqlite3.connect("database/test.db")
1658
+ conn.execute("CREATE TABLE test (id INTEGER)")
1659
+ conn.close()
1660
+ print("Database test successful")
1661
+ ```
1662
+
1663
+ #### Runtime Issues
1664
+
1665
+ **Issue**: Memory errors with large files
1666
+ ```python
1667
+ # Monitor memory usage
1668
+ import psutil
1669
+
1670
+ def check_memory():
1671
+ memory = psutil.virtual_memory()
1672
+ print(f"Memory usage: {memory.percent}%")
1673
+ print(f"Available: {memory.available / 1024**3:.1f}GB")
1674
+
1675
+ # Implement file chunking for large uploads
1676
+ def process_large_file_in_chunks(file_path: str, chunk_size: int = 64*1024*1024):
1677
+ with open(file_path, 'rb') as f:
1678
+ while chunk := f.read(chunk_size):
1679
+ yield chunk
1680
+ ```
1681
+
1682
+ **Issue**: Transcription jobs stuck
1683
+ ```python
1684
+ # Check pending jobs
1685
+ def diagnose_stuck_jobs():
1686
+ pending_jobs = transcription_manager.db.get_pending_jobs()
1687
+ print(f"Pending jobs: {len(pending_jobs)}")
1688
+
1689
+ for job in pending_jobs:
1690
+ duration = datetime.now() - datetime.fromisoformat(job.created_at)
1691
+ print(f"Job {job.job_id}: {job.status} for {duration}")
1692
+
1693
+ if duration.total_seconds() > 3600: # 1 hour
1694
+ print(f"⚠️ Job {job.job_id} may be stuck")
1695
+
1696
+ # Reset stuck jobs
1697
+ def reset_stuck_jobs():
1698
+ with transcription_manager.db.get_connection() as conn:
1699
+ conn.execute("""
1700
+ UPDATE transcriptions
1701
+ SET status = 'pending', azure_trans_id = NULL
1702
+ WHERE status = 'processing'
1703
+ AND created_at < datetime('now', '-1 hour')
1704
+ """)
1705
+ ```
1706
+
1707
+ **Issue**: Azure API errors
1708
+ ```python
1709
+ # Test Azure Speech Service
1710
+ def test_azure_speech():
1711
+ try:
1712
+ url = f"{AZURE_SPEECH_KEY_ENDPOINT}/speechtotext/v3.2/transcriptions"
1713
+ headers = {"Ocp-Apim-Subscription-Key": AZURE_SPEECH_KEY}
1714
+
1715
+ response = requests.get(url, headers=headers)
1716
+ print(f"Status: {response.status_code}")
1717
+ print(f"Response: {response.text[:200]}")
1718
+
1719
+ except Exception as e:
1720
+ print(f"Azure Speech test failed: {e}")
1721
+
1722
+ # Check Azure service status
1723
+ def check_azure_status():
1724
+ # Check Azure status page
1725
+ status_url = "https://status.azure.com/en-us/status"
1726
+ print(f"Check Azure status: {status_url}")
1727
+ ```
1728
+
1729
+ ### Debugging Tools
1730
+
1731
+ #### Debug Mode Configuration
1732
+ ```python
1733
+ # Enable debug mode
1734
+ DEBUG = os.environ.get("DEBUG", "false").lower() == "true"
1735
+
1736
+ if DEBUG:
1737
+ logging.basicConfig(level=logging.DEBUG)
1738
+
1739
+ # Enable Gradio debug mode
1740
+ demo.launch(debug=True, show_error=True)
1741
+ ```
1742
+
1743
+ #### Performance Profiling
1744
+ ```python
1745
+ import cProfile
1746
+ import pstats
1747
+
1748
+ def profile_function(func):
1749
+ """Profile function performance"""
1750
+ profiler = cProfile.Profile()
1751
+
1752
+ def wrapper(*args, **kwargs):
1753
+ profiler.enable()
1754
+ result = func(*args, **kwargs)
1755
+ profiler.disable()
1756
+
1757
+ # Print stats
1758
+ stats = pstats.Stats(profiler)
1759
+ stats.sort_stats('cumulative')
1760
+ stats.print_stats(10) # Top 10 functions
1761
+
1762
+ return result
1763
+
1764
+ return wrapper
1765
+
1766
+ # Usage
1767
+ @profile_function
1768
+ def submit_transcription(self, ...):
1769
+ # Function implementation
1770
+ pass
1771
+ ```
1772
+
1773
+ #### Log Analysis
1774
+ ```python
1775
+ def analyze_logs(log_file: str = "logs/transcription.log"):
1776
+ """Analyze application logs for issues"""
1777
+
1778
+ errors = []
1779
+ warnings = []
1780
+ performance_issues = []
1781
+
1782
+ with open(log_file, 'r') as f:
1783
+ for line in f:
1784
+ if 'ERROR' in line:
1785
+ errors.append(line.strip())
1786
+ elif 'WARNING' in line:
1787
+ warnings.append(line.strip())
1788
+ elif 'completed in' in line:
1789
+ # Extract timing information
1790
+ import re
1791
+ match = re.search(r'completed in (\d+\.\d+)s', line)
1792
+ if match and float(match.group(1)) > 30: # > 30 seconds
1793
+ performance_issues.append(line.strip())
1794
+
1795
+ print(f"Errors: {len(errors)}")
1796
+ print(f"Warnings: {len(warnings)}")
1797
+ print(f"Performance issues: {len(performance_issues)}")
1798
+
1799
+ return {
1800
+ 'errors': errors[-10:], # Last 10 errors
1801
+ 'warnings': warnings[-10:], # Last 10 warnings
1802
+ 'performance_issues': performance_issues[-10:]
1803
+ }
1804
+ ```
1805
+
1806
+ ### Production Troubleshooting
1807
+
1808
+ #### Service Health Check
1809
+ ```bash
1810
+ #!/bin/bash
1811
+ # health_check.sh
1812
+
1813
+ echo "=== System Health Check ==="
1814
+
1815
+ # Check service status
1816
+ systemctl is-active transcription
1817
+ systemctl is-active nginx
1818
+
1819
+ # Check disk space
1820
+ df -h
1821
+
1822
+ # Check memory usage
1823
+ free -h
1824
+
1825
+ # Check CPU usage
1826
+ top -b -n1 | grep "Cpu(s)"
1827
+
1828
+ # Check logs for errors
1829
+ tail -n 50 /home/transcription/app/logs/transcription.log | grep ERROR
1830
+
1831
+ # Check Azure connectivity
1832
+ curl -s -o /dev/null -w "%{http_code}" https://azure.microsoft.com/
1833
+
1834
+ echo "=== Health Check Complete ==="
1835
+ ```
1836
+
1837
+ #### Database Recovery
1838
+ ```python
1839
+ def recover_database():
1840
+ """Recover database from Azure backup"""
1841
+ try:
1842
+ # List available backups
1843
+ container_client = blob_service.get_container_client(AZURE_CONTAINER)
1844
+ backups = []
1845
+
1846
+ for blob in container_client.list_blobs(name_starts_with="shared/backups/"):
1847
+ backups.append({
1848
+ 'name': blob.name,
1849
+ 'modified': blob.last_modified
1850
+ })
1851
+
1852
+ # Sort by date (newest first)
1853
+ backups.sort(key=lambda x: x['modified'], reverse=True)
1854
+
1855
+ if not backups:
1856
+ print("No backups found")
1857
+ return
1858
+
1859
+ # Download latest backup
1860
+ latest_backup = backups[0]['name']
1861
+ print(f"Restoring from: {latest_backup}")
1862
+
1863
+ blob_client = blob_service.get_blob_client(
1864
+ container=AZURE_CONTAINER,
1865
+ blob=latest_backup
1866
+ )
1867
+
1868
+ # Download backup
1869
+ with open("database/transcriptions_restored.db", "wb") as f:
1870
+ f.write(blob_client.download_blob().readall())
1871
+
1872
+ print("Database restored successfully")
1873
+ print("Restart the application to use restored database")
1874
+
1875
+ except Exception as e:
1876
+ print(f"Database recovery failed: {str(e)}")
1877
+ ```
1878
+
1879
+ ---
1880
+
1881
+ ## 📚 Additional Resources
1882
+
1883
+ ### Documentation Links
1884
+ - [Azure Speech Services Documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/)
1885
+ - [Azure Blob Storage Documentation](https://docs.microsoft.com/en-us/azure/storage/blobs/)
1886
+ - [Gradio Documentation](https://gradio.app/docs/)
1887
+ - [SQLite Documentation](https://www.sqlite.org/docs.html)
1888
+ - [FFmpeg Documentation](https://ffmpeg.org/documentation.html)
1889
+
1890
+ ### Useful Tools
1891
+ - **Azure Storage Explorer**: GUI for managing blob storage
1892
+ - **DB Browser for SQLite**: Visual database management
1893
+ - **Postman**: API testing and development
1894
+ - **Azure CLI**: Command-line Azure management
1895
+ - **Visual Studio Code**: Recommended IDE with Azure extensions
1896
+
1897
+ ### Community Resources
1898
+ - [Azure Speech Services Community](https://docs.microsoft.com/en-us/answers/topics/azure-speech-services.html)
1899
+ - [Gradio Community](https://github.com/gradio-app/gradio/discussions)
1900
+ - [Python Audio Processing Libraries](https://github.com/topics/audio-processing)
1901
+
1902
+ ---
1903
+
1904
+ **This developer guide provides comprehensive information for setting up, developing, deploying, and maintaining the Azure Speech Transcription service. For additional help, refer to the linked documentation and community resources.** 🚀
README.md CHANGED
@@ -1,14 +1,12 @@
1
- ---
2
- title: Azure Powered AI Summary
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.44.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Azure Powered AI Summary with GPT 4.1 mini
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Azure Transcipt Service
3
+ emoji: 🔥
4
+ colorFrom: blue
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.35.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
USER.md ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🎙️ Azure Speech Transcription - User Guide
2
+
3
+ ## 📋 Table of Contents
4
+
5
+ - [Getting Started](#-getting-started)
6
+ - [Account Management](#-account-management)
7
+ - [Using Transcription Services](#-using-transcription-services)
8
+ - [Managing Your History](#-managing-your-history)
9
+ - [Privacy & Data Control](#-privacy--data-control)
10
+ - [Troubleshooting](#-troubleshooting)
11
+ - [FAQ](#-faq)
12
+
13
+ ---
14
+
15
+ ## 🚀 Getting Started
16
+
17
+ ### What is Azure Speech Transcription?
18
+
19
+ Azure Speech Transcription is a secure, PDPA-compliant service that converts your audio and video files into accurate text transcripts with speaker identification and precise timestamps. Your data is stored securely in your own private folder, ensuring complete privacy and compliance with data protection regulations.
20
+
21
+ ### Key Features
22
+
23
+ - 🎯 **High Accuracy**: Powered by Microsoft Azure Speech Services
24
+ - 🗣️ **Speaker Identification**: Automatically identifies different speakers
25
+ - ⏰ **Precise Timestamps**: Every sentence includes exact timing (HH:MM:SS)
26
+ - 🔒 **Privacy Compliant**: GDPR, PDPA, and data protection compliant
27
+ - 📁 **Multiple Formats**: Supports audio and video files
28
+ - 🌍 **Multi-Language**: Support for multiple languages
29
+ - 📊 **Personal Dashboard**: Track your usage and history
30
+
31
+ ---
32
+
33
+ ## 🔐 Account Management
34
+
35
+ ### Creating Your Account
36
+
37
+ 1. **Go to Registration Tab**
38
+ - Open the application in your web browser
39
+ - Click on the "📝 Register" tab
40
+
41
+ 2. **Fill Your Information**
42
+ - **Email**: Enter a valid email address
43
+ - **Username**: Choose 3-30 characters (letters, numbers, underscore only)
44
+ - **Password**: Create a strong password (minimum 8 characters with uppercase, lowercase, and numbers)
45
+ - **Confirm Password**: Re-enter your password
46
+
47
+ 3. **Privacy Consents** (Required)
48
+ - ✅ **GDPR Consent**: Required for account creation
49
+ - ✅ **Data Retention**: Required for service functionality
50
+ - ☐ **Marketing**: Optional - receive updates and news
51
+
52
+ 4. **Create Account**
53
+ - Click "📝 Create Account"
54
+ - You'll see a success message
55
+ - Click "🔑 Go to Login" to proceed
56
+
57
+ ### Logging In
58
+
59
+ 1. **Go to Login Tab**
60
+ - Click on the "🔑 Login" tab
61
+
62
+ 2. **Enter Credentials**
63
+ - **Email or Username**: Enter either your email or username
64
+ - **Password**: Enter your password
65
+
66
+ 3. **Access Your Account**
67
+ - Click "🔑 Login"
68
+ - You'll be taken to your personal dashboard
69
+
70
+ ### Password Requirements
71
+
72
+ Your password must include:
73
+ - ✅ At least 8 characters
74
+ - ✅ One uppercase letter (A-Z)
75
+ - ✅ One lowercase letter (a-z)
76
+ - ✅ One number (0-9)
77
+
78
+ **Strong Password Examples:**
79
+ - `MySecure123!`
80
+ - `Transcribe2024#`
81
+ - `AudioFiles456$`
82
+
83
+ ---
84
+
85
+ ## 🎙️ Using Transcription Services
86
+
87
+ ### Supported File Formats
88
+
89
+ #### Audio Formats
90
+ - **WAV** (recommended for fastest processing)
91
+ - **MP3**, **OGG**, **OPUS**, **FLAC**
92
+ - **WMA**, **AAC**, **M4A**, **AMR**
93
+ - **WebM**, **Speex**
94
+
95
+ #### Video Formats
96
+ - **MP4**, **MOV**, **AVI**, **MKV**
97
+ - **WMV**, **FLV**, **3GP**
98
+
99
+ ### File Size Limits
100
+ - **Maximum**: 500MB per file
101
+ - **Recommended**: Under 100MB for faster processing
102
+
103
+ ### Step-by-Step Transcription
104
+
105
+ 1. **Upload Your File**
106
+ - Click "Browse" under "Audio or Video File"
107
+ - Select your file from your computer
108
+ - Wait for upload confirmation
109
+
110
+ 2. **Configure Settings**
111
+
112
+ **Basic Settings:**
113
+ - **Language**: Choose the primary language spoken
114
+ - **Output Format**: Select audio format (WAV recommended)
115
+
116
+ **Advanced Settings:**
117
+ - **Speaker Identification**: Enable to identify different speakers
118
+ - **Max Speakers**: Set expected number of speakers (1-10)
119
+ - **Timestamps**: Include precise timing information
120
+ - **Profanity Filter**: Choose how to handle profanity
121
+ - `Masked`: Replace with ***
122
+ - `Removed`: Remove completely
123
+ - `Raw`: Keep original
124
+ - **Punctuation**: Automatic punctuation insertion
125
+ - **Lexical Form**: Include alternative word forms
126
+
127
+ 3. **Start Transcription**
128
+ - Click "🚀 Start Transcription"
129
+ - Processing begins automatically
130
+ - Status updates every 10 seconds
131
+
132
+ 4. **Monitor Progress**
133
+ - **⏳ Queued**: Waiting to start
134
+ - **🔄 Processing**: Converting and analyzing
135
+ - **✅ Done**: Transcription complete
136
+
137
+ 5. **Download Results**
138
+ - View transcript in the text area
139
+ - Download the transcript file
140
+ - Access from your history anytime
141
+
142
+ ### Understanding Your Transcript
143
+
144
+ Your transcript includes:
145
+ - **Timestamps**: `[00:02:15]` (hours:minutes:seconds)
146
+ - **Speaker Labels**: `Speaker 0:`, `Speaker 1:`, etc.
147
+ - **Formatted Text**: Proper punctuation and capitalization
148
+
149
+ **Example Output:**
150
+ ```
151
+ [00:00:12] Speaker 0: Welcome to today's meeting. Let's start with the agenda.
152
+
153
+ [00:00:18] Speaker 1: Thank you. First item is the quarterly review.
154
+
155
+ [00:00:25] Speaker 0: Perfect. Let me share the presentation now.
156
+ ```
157
+
158
+ ---
159
+
160
+ ## 📚 Managing Your History
161
+
162
+ ### Viewing Your Transcriptions
163
+
164
+ 1. **Go to History Tab**
165
+ - Click "📚 My History" tab
166
+ - Your transcriptions appear automatically
167
+
168
+ 2. **Understanding the Table**
169
+ - **Date**: When transcription was created
170
+ - **Filename**: Original file name
171
+ - **Language**: Language used for transcription
172
+ - **Status**: Current status (Done, Processing, etc.)
173
+ - **Duration**: How long processing took
174
+ - **Job ID**: Unique identifier
175
+ - **Download**: Availability status
176
+
177
+ 3. **View Options**
178
+ - **Recent 20**: Default view of recent transcriptions
179
+ - **Show All**: Check box to see all your transcriptions
180
+
181
+ ### Downloading Transcripts
182
+
183
+ **Method 1: From Results**
184
+ - Complete transcription → Download button appears
185
+ - Click to download immediately
186
+
187
+ **Method 2: From History**
188
+ - Go to "📚 My History" tab
189
+ - Click "🔄 Refresh My History & Downloads"
190
+ - Download files appear below the table
191
+ - Click any available download link
192
+
193
+ **Method 3: From Downloads Section**
194
+ - Scroll to "📥 Download Your Completed Transcripts"
195
+ - Available transcripts show as file download buttons
196
+ - Click to download specific transcripts
197
+
198
+ ### Personal Statistics
199
+
200
+ Your dashboard shows:
201
+ - **Total Jobs**: All transcriptions you've created
202
+ - **Completed**: Successfully finished transcriptions
203
+ - **Processing**: Currently in progress
204
+ - **Pending**: Waiting to start
205
+ - **Failed**: Transcriptions that encountered errors
206
+ - **Last 7 Days**: Recent activity
207
+
208
+ ---
209
+
210
+ ## 🔒 Privacy & Data Control
211
+
212
+ ### GDPR & Data Rights
213
+
214
+ You have complete control over your data:
215
+
216
+ #### 📊 Data Export
217
+ 1. Go to "🔒 Privacy & Data" tab
218
+ 2. Click "📦 Export My Data"
219
+ 3. Download complete data archive (JSON format)
220
+
221
+ **What's Included:**
222
+ - Account information
223
+ - All transcription history
224
+ - Usage statistics
225
+ - Privacy preferences
226
+
227
+ #### 📧 Marketing Preferences
228
+ 1. Go to "🔒 Privacy & Data" tab
229
+ 2. Update "Marketing Consent" checkbox
230
+ 3. Click "✅ Update Consent"
231
+
232
+ #### 🗑️ Account Deletion
233
+ 1. Go to "🔒 Privacy & Data" tab
234
+ 2. Type "DELETE MY ACCOUNT" in confirmation field
235
+ 3. Click "🗑️ Delete My Account"
236
+
237
+ **⚠️ Warning**: This permanently deletes:
238
+ - Your account and profile
239
+ - All transcription files
240
+ - Usage history and statistics
241
+ - Data stored in Azure
242
+
243
+ ### Data Security
244
+
245
+ Your data is protected by:
246
+ - **Encryption**: All data encrypted in transit and at rest
247
+ - **Isolation**: Your files stored in private user folder
248
+ - **Access Control**: Only you can access your data
249
+ - **Compliance**: GDPR, PDPA, and privacy regulation compliant
250
+ - **Audit Trail**: Complete logging for security
251
+
252
+ ### Where Your Data is Stored
253
+
254
+ - **User Folder**: `users/{your-user-id}/`
255
+ - `audio/`: Processed audio files
256
+ - `transcripts/`: Text transcriptions
257
+ - `originals/`: Original uploaded files
258
+ - **Location**: Secure Azure Blob Storage
259
+ - **Retention**: Until you delete your account
260
+
261
+ ---
262
+
263
+ ## 🛠️ Troubleshooting
264
+
265
+ ### Common Issues
266
+
267
+ #### Authentication Problems
268
+
269
+ **Problem**: Can't log in
270
+ - ✅ Check username/email spelling
271
+ - ✅ Verify password (case-sensitive)
272
+ - ✅ Ensure caps lock is off
273
+ - ✅ Try password reset if available
274
+
275
+ **Problem**: Registration fails
276
+ - ✅ Check email format (must be valid email)
277
+ - ✅ Username requirements (3-30 chars, alphanumeric + underscore)
278
+ - ✅ Password requirements (8+ chars, mixed case, numbers)
279
+ - ✅ Required consents must be checked
280
+
281
+ #### File Upload Issues
282
+
283
+ **Problem**: File won't upload
284
+ - ✅ Check file size (max 500MB)
285
+ - ✅ Verify file format is supported
286
+ - ✅ Ensure stable internet connection
287
+ - ✅ Try smaller file first
288
+
289
+ **Problem**: Unsupported format error
290
+ - ✅ Convert to supported format (WAV, MP3, MP4)
291
+ - ✅ Check file isn't corrupted
292
+ - ✅ Try different file
293
+
294
+ #### Processing Issues
295
+
296
+ **Problem**: Transcription stuck in "Processing"
297
+ - ✅ Wait - large files take time
298
+ - ✅ Check auto-refresh is active
299
+ - ✅ Refresh browser if needed
300
+ - ✅ Check Azure service status
301
+
302
+ **Problem**: Transcription failed
303
+ - ✅ Check error message for details
304
+ - ✅ Verify audio quality is good
305
+ - ✅ Try different audio format
306
+ - ✅ Ensure speakers are clearly audible
307
+
308
+ #### Results Issues
309
+
310
+ **Problem**: Poor transcription quality
311
+ - ✅ Use clear, high-quality audio
312
+ - ✅ Minimize background noise
313
+ - ✅ Ensure speakers speak clearly
314
+ - ✅ Select correct language
315
+ - ✅ Try WAV format for best results
316
+
317
+ **Problem**: Speakers not identified correctly
318
+ - ✅ Enable "Speaker Identification"
319
+ - ✅ Set correct number of speakers
320
+ - ✅ Ensure speakers have distinct voices
321
+ - ✅ Minimize speaker overlap
322
+
323
+ ### Performance Tips
324
+
325
+ #### For Best Results
326
+ - **Audio Quality**: Use high-quality recordings
327
+ - **File Format**: WAV files process fastest
328
+ - **Speaker Separation**: Clear pauses between speakers
329
+ - **Background Noise**: Minimize environmental noise
330
+ - **Language Selection**: Choose correct primary language
331
+
332
+ #### For Faster Processing
333
+ - **File Size**: Smaller files process faster
334
+ - **Format**: WAV > MP3 > other formats
335
+ - **Settings**: Disable unused features
336
+ - **Timing**: Process during off-peak hours
337
+
338
+ ---
339
+
340
+ ## ❓ FAQ
341
+
342
+ ### General Questions
343
+
344
+ **Q: Is my data secure?**
345
+ A: Yes, your data is stored in encrypted, user-separated Azure Blob Storage with enterprise-grade security.
346
+
347
+ **Q: Can others see my transcriptions?**
348
+ A: No, your data is completely private. Only you can access your transcriptions.
349
+
350
+ **Q: How long are transcriptions stored?**
351
+ A: Indefinitely, until you delete your account or individual transcriptions.
352
+
353
+ **Q: Is there a usage limit?**
354
+ A: Check with your administrator for any usage limits or quotas.
355
+
356
+ ### Technical Questions
357
+
358
+ **Q: What languages are supported?**
359
+ A: Multiple languages including English, Thai, Chinese, Japanese, Korean, Spanish, French, German, and others.
360
+
361
+ **Q: How accurate are the transcriptions?**
362
+ A: Very high accuracy using Microsoft Azure Speech Services, typically 85-95% depending on audio quality.
363
+
364
+ **Q: Can I edit transcriptions?**
365
+ A: You can copy and edit the text after download, but the original transcript is preserved.
366
+
367
+ **Q: Do you store my original files?**
368
+ A: Yes, originals are stored in your private folder and can be downloaded anytime.
369
+
370
+ ### Privacy Questions
371
+
372
+ **Q: Can I export all my data?**
373
+ A: Yes, use the "Export My Data" feature to download everything in JSON format.
374
+
375
+ **Q: How do I delete my account?**
376
+ A: Go to Privacy & Data tab and follow the account deletion process.
377
+
378
+ **Q: What happens to my data if I delete my account?**
379
+ A: All data is permanently deleted from Azure storage within 24 hours.
380
+
381
+ **Q: Do you use my data for training?**
382
+ A: No, your data is never used for training or shared with third parties.
383
+
384
+ ### Billing Questions
385
+
386
+ **Q: How much does it cost?**
387
+ A: Contact your administrator for pricing information.
388
+
389
+ **Q: Are there free tiers available?**
390
+ A: Depends on your organization's setup and Azure subscription.
391
+
392
+ ---
393
+
394
+ ## 📞 Getting Help
395
+
396
+ ### Support Resources
397
+
398
+ 1. **This User Guide**: Comprehensive information for all features
399
+ 2. **Error Messages**: Pay attention to specific error descriptions
400
+ 3. **System Status**: Check if Azure services are operational
401
+ 4. **Administrator**: Contact your system administrator for account issues
402
+
403
+ ### Reporting Issues
404
+
405
+ When reporting problems, include:
406
+ - **What you were trying to do**
407
+ - **What happened instead**
408
+ - **Error messages (exact text)**
409
+ - **File type and size**
410
+ - **Browser and operating system**
411
+ - **Steps to reproduce the issue**
412
+
413
+ ---
414
+
415
+ ## 🎯 Tips for Success
416
+
417
+ ### Getting the Best Transcriptions
418
+
419
+ 1. **Audio Quality Matters**
420
+ - Use good microphones
421
+ - Record in quiet environments
422
+ - Ensure clear speech
423
+ - Avoid speaker overlap
424
+
425
+ 2. **File Preparation**
426
+ - Convert to WAV for best results
427
+ - Trim unnecessary silence
428
+ - Normalize audio levels
429
+ - Remove background music if possible
430
+
431
+ 3. **Settings Optimization**
432
+ - Choose correct language
433
+ - Set appropriate speaker count
434
+ - Enable relevant features only
435
+ - Use appropriate profanity filter
436
+
437
+ 4. **Workflow Efficiency**
438
+ - Process multiple files in batches
439
+ - Use consistent naming conventions
440
+ - Download transcripts promptly
441
+ - Keep originals as backups
442
+
443
+ ### Privacy Best Practices
444
+
445
+ 1. **Account Security**
446
+ - Use strong, unique passwords
447
+ - Log out when finished
448
+ - Don't share login credentials
449
+ - Review account regularly
450
+
451
+ 2. **Data Management**
452
+ - Export data periodically
453
+ - Delete unnecessary transcriptions
454
+ - Review privacy settings
455
+ - Understand data retention
456
+
457
+ ---
458
+
459
+ **Welcome to Azure Speech Transcription! We hope this guide helps you make the most of our service. For the best experience, keep your audio quality high and your data organized.** 🎉
ai_summary.py ADDED
@@ -0,0 +1,796 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import uuid
4
+ import time
5
+ import base64
6
+ from datetime import datetime
7
+ from typing import Dict, List, Optional, Tuple
8
+ from dataclasses import dataclass, asdict
9
+ import tempfile
10
+ import shutil
11
+ from dotenv import load_dotenv
12
+ import tiktoken
13
+ import requests
14
+
15
+ from azure.cognitiveservices.vision.computervision import ComputerVisionClient
16
+ from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
17
+ from msrest.authentication import CognitiveServicesCredentials
18
+
19
+ from file_processors import FileProcessor
20
+ from image_extraction import VideoFrameExtractor
21
+
22
+ # Load Environment
23
+ load_dotenv()
24
+
25
+ @dataclass
26
+ class SummaryJob:
27
+ job_id: str
28
+ user_id: str
29
+ original_files: List[str]
30
+ summary_type: str
31
+ user_prompt: str
32
+ status: str
33
+ created_at: str
34
+ completed_at: Optional[str] = None
35
+ summary_text: Optional[str] = None
36
+ processed_files: Optional[Dict] = None
37
+ extracted_images: Optional[List[str]] = None
38
+ transcript_text: Optional[str] = None
39
+ error_message: Optional[str] = None
40
+ settings: Optional[Dict] = None
41
+
42
+ class TokenManager:
43
+ """Manage token counting and content truncation for GPT models"""
44
+
45
+ def __init__(self, model_name: str = "gpt-4o-mini"):
46
+ try:
47
+ # Map common model names to encodings
48
+ model_encoding_map = {
49
+ "gpt-4o": "o200k_base",
50
+ "gpt-4o-mini": "o200k_base",
51
+ "gpt-4": "cl100k_base",
52
+ "gpt-4-turbo": "cl100k_base",
53
+ "gpt-35-turbo": "cl100k_base"
54
+ }
55
+
56
+ encoding_name = model_encoding_map.get(model_name, "cl100k_base")
57
+ self.encoder = tiktoken.get_encoding(encoding_name)
58
+ except Exception as e:
59
+ print(f"Warning: Could not load tokenizer for {model_name}, using fallback: {e}")
60
+ self.encoder = tiktoken.get_encoding("cl100k_base")
61
+
62
+ # Token limits based on model
63
+ if "gpt-4o" in model_name:
64
+ self.max_input_tokens = 120000 # 128k context window
65
+ else:
66
+ self.max_input_tokens = 100000 # Conservative limit
67
+
68
+ self.max_transcript_tokens = 80000
69
+ self.max_document_tokens = 30000
70
+ self.max_image_analysis_tokens = 10000
71
+
72
+ def count_tokens(self, text: str) -> int:
73
+ """Count tokens in text"""
74
+ try:
75
+ return len(self.encoder.encode(text))
76
+ except Exception:
77
+ # Fallback estimation: ~4 characters per token
78
+ return len(text) // 4
79
+
80
+ def truncate_text(self, text: str, max_tokens: int) -> str:
81
+ """Truncate text to fit within token limit"""
82
+ if not text:
83
+ return text
84
+
85
+ current_tokens = self.count_tokens(text)
86
+ if current_tokens <= max_tokens:
87
+ return text
88
+
89
+ lines = text.split('\n')
90
+ if len(lines) == 1:
91
+ # Single line - truncate by character estimation
92
+ chars_per_token = len(text) / current_tokens
93
+ target_chars = int(max_tokens * chars_per_token * 0.9)
94
+ return text[:target_chars] + "\n[Content truncated due to length]"
95
+
96
+ # Multi-line - truncate by lines
97
+ truncated_lines = []
98
+ current_tokens = 0
99
+
100
+ for line in lines:
101
+ line_tokens = self.count_tokens(line + '\n')
102
+ if current_tokens + line_tokens > max_tokens:
103
+ truncated_lines.append("[Content truncated due to length]")
104
+ break
105
+ truncated_lines.append(line)
106
+ current_tokens += line_tokens
107
+
108
+ return '\n'.join(truncated_lines)
109
+
110
+ def optimize_content_for_tokens(self, transcripts: List[Dict], documents: List[Dict],
111
+ image_insights: List[Dict], user_prompt: str) -> Tuple[List[Dict], List[Dict], List[Dict]]:
112
+ """Optimize content to fit within token limits"""
113
+
114
+ # Truncate transcripts
115
+ total_transcript_text = ""
116
+ for transcript in transcripts:
117
+ total_transcript_text += transcript.get('content', '') + "\n\n"
118
+
119
+ transcript_tokens = self.count_tokens(total_transcript_text)
120
+ if transcript_tokens > self.max_transcript_tokens:
121
+ print(f"Transcripts too long ({transcript_tokens} tokens), truncating to {self.max_transcript_tokens}")
122
+ tokens_per_transcript = self.max_transcript_tokens // len(transcripts) if transcripts else 0
123
+ for transcript in transcripts:
124
+ transcript['content'] = self.truncate_text(
125
+ transcript['content'], tokens_per_transcript
126
+ )
127
+
128
+ # Truncate documents
129
+ total_document_text = ""
130
+ for doc in documents:
131
+ total_document_text += doc.get('content', '') + "\n\n"
132
+
133
+ doc_tokens = self.count_tokens(total_document_text)
134
+ if doc_tokens > self.max_document_tokens:
135
+ print(f"Documents too long ({doc_tokens} tokens), truncating to {self.max_document_tokens}")
136
+ tokens_per_doc = self.max_document_tokens // len(documents) if documents else 0
137
+ for doc in documents:
138
+ doc['content'] = self.truncate_text(
139
+ doc['content'], tokens_per_doc
140
+ )
141
+
142
+ # Truncate image analysis
143
+ if len(image_insights) > 10:
144
+ print(f"Too many images ({len(image_insights)}), limiting to 10")
145
+ image_insights = image_insights[:10]
146
+
147
+ for img in image_insights:
148
+ if 'analysis' in img:
149
+ desc = img['analysis'].get('description', '')
150
+ text = img['analysis'].get('extracted_text', '')
151
+ if desc:
152
+ img['analysis']['description'] = self.truncate_text(desc, 200)
153
+ if text:
154
+ img['analysis']['extracted_text'] = self.truncate_text(text, 300)
155
+
156
+ return transcripts, documents, image_insights
157
+
158
+ class AISummaryManager:
159
+ """AI-powered conference summarization using Azure OpenAI with enhanced backend integration"""
160
+
161
+ def __init__(self):
162
+ # Azure OpenAI Configuration
163
+ self.azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
164
+ self.azure_openai_key = os.environ.get("AZURE_OPENAI_KEY")
165
+ self.azure_openai_deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o-mini")
166
+ self.azure_openai_api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-08-01-preview")
167
+
168
+ # Computer Vision Configuration
169
+ self.cv_endpoint = os.environ.get("COMPUTER_VISION_ENDPOINT")
170
+ self.cv_key = os.environ.get("COMPUTER_VISION_KEY")
171
+
172
+ # Initialize services
173
+ self.cv_client = None
174
+ self.file_processor = FileProcessor()
175
+ self.frame_extractor = VideoFrameExtractor()
176
+ self.token_manager = TokenManager(self.azure_openai_deployment)
177
+
178
+ # Initialize backend integration
179
+ self.backend_manager = None
180
+ self._init_backend_integration()
181
+
182
+ self._init_services()
183
+
184
+ def _init_backend_integration(self):
185
+ """Initialize integration with the enhanced backend"""
186
+ try:
187
+ # Import the transcription manager from backend
188
+ from backend import transcription_manager
189
+ self.backend_manager = transcription_manager
190
+ print("Backend integration initialized successfully")
191
+ except ImportError as e:
192
+ print(f"Warning: Could not initialize backend integration: {e}")
193
+ self.backend_manager = None
194
+
195
+ def _init_services(self):
196
+ """Initialize services with validation"""
197
+ # Validate Azure OpenAI configuration
198
+ if not all([self.azure_openai_endpoint, self.azure_openai_key, self.azure_openai_deployment]):
199
+ print("ERROR: Missing Azure OpenAI configuration")
200
+ print("Required environment variables:")
201
+ print("- AZURE_OPENAI_ENDPOINT")
202
+ print("- AZURE_OPENAI_KEY")
203
+ print("- AZURE_OPENAI_DEPLOYMENT")
204
+ raise ValueError("Azure OpenAI configuration incomplete")
205
+
206
+ # Validate endpoint format
207
+ if not self.azure_openai_endpoint.startswith("https://"):
208
+ raise ValueError("AZURE_OPENAI_ENDPOINT must be a valid HTTPS URL")
209
+
210
+ # Remove trailing slash from endpoint
211
+ self.azure_openai_endpoint = self.azure_openai_endpoint.rstrip('/')
212
+
213
+ print(f"Azure OpenAI initialized: {self.azure_openai_deployment} at {self.azure_openai_endpoint}")
214
+
215
+ # Test Azure OpenAI connection
216
+ try:
217
+ self._test_azure_openai_connection()
218
+ except Exception as e:
219
+ print(f"WARNING: Azure OpenAI connection test failed: {e}")
220
+
221
+ # Initialize Computer Vision Client
222
+ if self.cv_key and self.cv_endpoint:
223
+ try:
224
+ self.cv_client = ComputerVisionClient(
225
+ self.cv_endpoint,
226
+ CognitiveServicesCredentials(self.cv_key)
227
+ )
228
+ print("Computer Vision Client initialized")
229
+ except Exception as e:
230
+ print(f"WARNING: Computer Vision initialization failed: {e}")
231
+ else:
232
+ print("Computer Vision key/endpoint not found - image processing disabled")
233
+
234
+ def _test_azure_openai_connection(self):
235
+ """Test Azure OpenAI connection"""
236
+ url = f"{self.azure_openai_endpoint}/openai/deployments/{self.azure_openai_deployment}/chat/completions?api-version={self.azure_openai_api_version}"
237
+
238
+ headers = {
239
+ "Content-Type": "application/json",
240
+ "api-key": self.azure_openai_key
241
+ }
242
+
243
+ test_data = {
244
+ "messages": [{"role": "user", "content": "Hello"}],
245
+ "max_tokens": 5,
246
+ "temperature": 0
247
+ }
248
+
249
+ try:
250
+ response = requests.post(url, headers=headers, json=test_data, timeout=10)
251
+ if response.status_code == 200:
252
+ print("Azure OpenAI connection test: SUCCESS")
253
+ else:
254
+ print(f"Azure OpenAI connection test failed: {response.status_code} - {response.text}")
255
+ raise Exception(f"Connection test failed: {response.status_code}")
256
+ except requests.exceptions.RequestException as e:
257
+ print(f"Azure OpenAI connection test error: {e}")
258
+ raise
259
+
260
+ def submit_summary_job(
261
+ self,
262
+ user_id: str,
263
+ summary_type: str,
264
+ user_prompt: str,
265
+ files: List = None,
266
+ transcript_job_ids: List[str] = None,
267
+ settings: Dict = None
268
+ ) -> str:
269
+ """Submit a new AI summary job with enhanced backend integration"""
270
+ job_id = str(uuid.uuid4())
271
+
272
+ original_files = []
273
+ if files:
274
+ original_files.extend([f.name if hasattr(f, 'name') else str(f) for f in files])
275
+ if transcript_job_ids:
276
+ original_files.extend([f"transcript_{tid[:8]}..." for tid in transcript_job_ids])
277
+
278
+ print(f"[{user_id[:8]}...] New AI summary job: {summary_type}")
279
+ print(f"User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
280
+
281
+ job = SummaryJob(
282
+ job_id=job_id,
283
+ user_id=user_id,
284
+ original_files=original_files,
285
+ summary_type=summary_type,
286
+ user_prompt=user_prompt,
287
+ status="pending",
288
+ created_at=datetime.now().isoformat(),
289
+ settings=settings or {}
290
+ )
291
+
292
+ # Save job to backend database
293
+ if self.backend_manager:
294
+ self.backend_manager.save_summary_job(job)
295
+ else:
296
+ self._save_summary_job_fallback(job)
297
+
298
+ # Start background processing
299
+ from concurrent.futures import ThreadPoolExecutor
300
+ executor = ThreadPoolExecutor(max_workers=1)
301
+ executor.submit(self._process_summary_job, job_id, files, transcript_job_ids)
302
+
303
+ return job_id
304
+
305
+ def _save_summary_job_fallback(self, job: SummaryJob):
306
+ """Fallback method to save summary job if backend is not available"""
307
+ try:
308
+ # Create a simple local storage fallback
309
+ os.makedirs("temp/summary_jobs", exist_ok=True)
310
+ job_file = f"temp/summary_jobs/{job.job_id}.json"
311
+
312
+ with open(job_file, 'w', encoding='utf-8') as f:
313
+ json.dump(asdict(job), f, ensure_ascii=False, indent=2)
314
+
315
+ print(f"Job saved to fallback storage: {job_file}")
316
+ except Exception as e:
317
+ print(f"Error saving job to fallback storage: {e}")
318
+
319
+ def _process_summary_job(self, job_id: str, files: List = None, transcript_job_ids: List[str] = None):
320
+ """Process AI summary job using Azure OpenAI with enhanced error handling and chat storage"""
321
+ job = None
322
+ try:
323
+ job = self.get_summary_status(job_id)
324
+ if not job:
325
+ print(f"Job {job_id[:8]}... not found")
326
+ return
327
+
328
+ print(f"[{job.user_id[:8]}...] Processing AI summary job: {job_id[:8]}...")
329
+
330
+ job.status = "processing"
331
+ self._update_job_status(job)
332
+
333
+ # Process all input sources
334
+ processed_content = {
335
+ 'transcripts': [],
336
+ 'documents': [],
337
+ 'images': [],
338
+ 'extracted_frames': []
339
+ }
340
+
341
+ # Process existing transcripts
342
+ if transcript_job_ids:
343
+ processed_content['transcripts'] = self._get_existing_transcripts(transcript_job_ids, job.user_id)
344
+ print(f"Processed {len(processed_content['transcripts'])} existing transcripts")
345
+
346
+ # Process uploaded files
347
+ if files:
348
+ for i, file in enumerate(files):
349
+ print(f"Processing file {i+1}/{len(files)}: {getattr(file, 'name', 'unknown')}")
350
+ file_content = self._process_uploaded_file(file, job.user_id)
351
+ if file_content:
352
+ if file_content['type'] == 'video':
353
+ frames = self._extract_significant_frames(file_content['path'])
354
+ processed_content['extracted_frames'].extend(frames)
355
+ print(f"Extracted {len(frames)} frames from video")
356
+ elif file_content['type'] == 'document':
357
+ processed_content['documents'].append(file_content)
358
+ elif file_content['type'] == 'image':
359
+ processed_content['images'].append(file_content)
360
+
361
+ # Analyze images with Computer Vision
362
+ image_insights = []
363
+ all_images = processed_content['images'] + processed_content['extracted_frames']
364
+
365
+ print(f"Analyzing {len(all_images)} images...")
366
+ for image_info in all_images:
367
+ analysis = self._analyze_image_content(image_info['path'])
368
+ if analysis:
369
+ image_insights.append({
370
+ 'source': image_info['filename'],
371
+ 'analysis': analysis
372
+ })
373
+
374
+ print(f"Analysis complete: {len(processed_content['transcripts'])} transcripts, {len(processed_content['documents'])} documents, {len(image_insights)} images")
375
+
376
+ # Optimize content for token limits
377
+ optimized_transcripts, optimized_documents, optimized_images = self.token_manager.optimize_content_for_tokens(
378
+ processed_content['transcripts'],
379
+ processed_content['documents'],
380
+ image_insights,
381
+ job.user_prompt
382
+ )
383
+
384
+ output_language = job.settings.get('output_language', 'English') if job.settings else 'English'
385
+
386
+ # Generate AI summary using Azure OpenAI
387
+ summary_result = self._generate_ai_summary_with_openai(
388
+ transcripts=optimized_transcripts,
389
+ documents=optimized_documents,
390
+ image_insights=optimized_images,
391
+ user_prompt=job.user_prompt,
392
+ output_language=output_language
393
+ )
394
+
395
+ # Store response to chat container using backend integration
396
+ if self.backend_manager and hasattr(self.backend_manager.db, '_store_chat_response'):
397
+ try:
398
+ chat_url = self.backend_manager.db._store_chat_response(job_id, summary_result, job.user_id)
399
+ print(f"💬 Chat response stored successfully: {chat_url}")
400
+ except Exception as e:
401
+ print(f"⚠️ Warning: Could not store chat response: {e}")
402
+
403
+ # Update job with results
404
+ job.status = "completed"
405
+ job.summary_text = summary_result
406
+ job.completed_at = datetime.now().isoformat()
407
+ job.processed_files = {
408
+ 'transcript_count': len(processed_content['transcripts']),
409
+ 'document_count': len(processed_content['documents']),
410
+ 'image_count': len(all_images),
411
+ 'extracted_frames': len(processed_content['extracted_frames'])
412
+ }
413
+ job.extracted_images = [img['filename'] for img in processed_content['extracted_frames']]
414
+
415
+ self._update_job_status(job)
416
+
417
+ print(f"[{job.user_id[:8]}...] AI summary completed: {job_id[:8]}...")
418
+
419
+ except Exception as e:
420
+ print(f"AI summary processing failed: {e}")
421
+ if job:
422
+ job.status = "failed"
423
+ job.error_message = str(e)
424
+ job.completed_at = datetime.now().isoformat()
425
+ self._update_job_status(job)
426
+
427
+ def _update_job_status(self, job: SummaryJob):
428
+ """Update job status in backend or fallback storage"""
429
+ if self.backend_manager:
430
+ self.backend_manager.save_summary_job(job)
431
+ else:
432
+ self._save_summary_job_fallback(job)
433
+
434
+ def _generate_ai_summary_with_openai(
435
+ self,
436
+ transcripts: List[Dict],
437
+ documents: List[Dict],
438
+ image_insights: List[Dict],
439
+ user_prompt: str,
440
+ output_language: str = "English"
441
+ ) -> str:
442
+ """Generate AI summary using Azure OpenAI with proper error handling"""
443
+ try:
444
+ print("Generating AI summary with Azure OpenAI...")
445
+
446
+ # Prepare the context
447
+ context = self._prepare_text_content(transcripts, documents, image_insights, user_prompt, output_language)
448
+
449
+ # Check final token count
450
+ final_tokens = self.token_manager.count_tokens(context)
451
+ print(f"Final input tokens: {final_tokens} / {self.token_manager.max_input_tokens}")
452
+
453
+ if final_tokens > self.token_manager.max_input_tokens:
454
+ print("Content too long, applying emergency truncation")
455
+ context = self.token_manager.truncate_text(context, self.token_manager.max_input_tokens)
456
+
457
+ # Prepare the Azure OpenAI API request with correct URL format
458
+ url = f"{self.azure_openai_endpoint}/openai/deployments/{self.azure_openai_deployment}/chat/completions?api-version={self.azure_openai_api_version}"
459
+
460
+ headers = {
461
+ "Content-Type": "application/json",
462
+ "api-key": self.azure_openai_key
463
+ }
464
+
465
+ # Adjust max tokens based on model
466
+ max_completion_tokens = 4000
467
+ if "gpt-4o" in self.azure_openai_deployment:
468
+ max_completion_tokens = 8000
469
+
470
+ data = {
471
+ "messages": [
472
+ {
473
+ "role": "system",
474
+ "content": f"You are an expert conference analyst. Create comprehensive summaries in {output_language}. Focus on key insights, decisions, action items, and important discussions. Integrate information from all sources provided."
475
+ },
476
+ {
477
+ "role": "user",
478
+ "content": context
479
+ }
480
+ ],
481
+ "max_tokens": max_completion_tokens,
482
+ "temperature": 0.2,
483
+ "top_p": 0.3,
484
+ "frequency_penalty": 0,
485
+ "presence_penalty": 0
486
+ }
487
+
488
+ print(f"Making API request to: {url}")
489
+ print(f"Using model: {self.azure_openai_deployment}")
490
+
491
+ # Make the API request with retries
492
+ max_retries = 3
493
+ for attempt in range(max_retries):
494
+ try:
495
+ response = requests.post(
496
+ url,
497
+ headers=headers,
498
+ json=data,
499
+ timeout=300 # 5 minute timeout
500
+ )
501
+
502
+ print(f"API Response Status: {response.status_code}")
503
+
504
+ if response.status_code == 200:
505
+ break
506
+ elif response.status_code == 429:
507
+ # Rate limit - wait and retry
508
+ wait_time = 2 ** attempt
509
+ print(f"Rate limited, waiting {wait_time} seconds...")
510
+ time.sleep(wait_time)
511
+ continue
512
+ else:
513
+ error_msg = f"Azure OpenAI API error: {response.status_code} - {response.text}"
514
+ print(error_msg)
515
+ if attempt == max_retries - 1:
516
+ raise Exception(error_msg)
517
+ time.sleep(1)
518
+ continue
519
+
520
+ except requests.exceptions.Timeout:
521
+ if attempt == max_retries - 1:
522
+ raise Exception("Azure OpenAI request timed out after multiple retries")
523
+ print(f"Request timeout, retrying... (attempt {attempt + 1})")
524
+ time.sleep(2)
525
+ continue
526
+ except requests.exceptions.RequestException as e:
527
+ if attempt == max_retries - 1:
528
+ raise Exception(f"Azure OpenAI request failed: {str(e)}")
529
+ print(f"Request error, retrying... (attempt {attempt + 1}): {e}")
530
+ time.sleep(2)
531
+ continue
532
+
533
+ try:
534
+ result = response.json()
535
+ except json.JSONDecodeError as e:
536
+ raise Exception(f"Invalid JSON response from Azure OpenAI: {str(e)}")
537
+
538
+ # Extract the AI response
539
+ if 'choices' in result and len(result['choices']) > 0:
540
+ choice = result['choices'][0]
541
+
542
+ if 'message' in choice and 'content' in choice['message']:
543
+ ai_response = choice['message']['content']
544
+
545
+ # Check for completion reason
546
+ finish_reason = choice.get('finish_reason', '')
547
+ if finish_reason == 'content_filter':
548
+ raise Exception("Content was filtered by Azure OpenAI safety systems")
549
+ elif finish_reason == 'length':
550
+ print("Response was truncated due to length limit")
551
+ ai_response += "\n\n[Response was truncated due to length limit]"
552
+
553
+ print(f"AI summary generated successfully in {output_language}")
554
+ return ai_response
555
+ else:
556
+ raise Exception(f"Unexpected response format: {result}")
557
+ else:
558
+ raise Exception(f"No response generated from Azure OpenAI: {result}")
559
+
560
+ except Exception as e:
561
+ error_msg = f"Azure OpenAI generation failed: {str(e)}"
562
+ print(error_msg)
563
+ raise Exception(error_msg)
564
+
565
+ def _prepare_text_content(self, transcripts, documents, image_insights, user_prompt, output_language="English"):
566
+ """Prepare comprehensive text content for AI analysis"""
567
+ context_parts = [
568
+ "# Conference Summary Request",
569
+ f"**User Instructions:** {user_prompt}",
570
+ f"**Output Language:** {output_language}",
571
+ "",
572
+ "## Content to Analyze:",
573
+ ""
574
+ ]
575
+
576
+ # Add transcript content
577
+ if transcripts:
578
+ context_parts.append("### Transcripts:")
579
+ for i, transcript in enumerate(transcripts, 1):
580
+ context_parts.append(f"**Source {i}: {transcript['source']}**")
581
+ context_parts.append(transcript['content'])
582
+ context_parts.append("")
583
+
584
+ # Add document content
585
+ if documents:
586
+ context_parts.append("### Documents:")
587
+ for i, doc in enumerate(documents, 1):
588
+ context_parts.append(f"**Document {i}: {doc['filename']}**")
589
+ context_parts.append(doc['content'])
590
+ context_parts.append("")
591
+
592
+ # Add image analysis
593
+ if image_insights:
594
+ context_parts.append("### Visual Content:")
595
+ for i, img in enumerate(image_insights, 1):
596
+ context_parts.append(f"**Image {i}: {img['source']}**")
597
+ analysis = img['analysis']
598
+ context_parts.append(f"Description: {analysis.get('description', 'N/A')}")
599
+ if analysis.get('extracted_text'):
600
+ context_parts.append(f"Text: {analysis['extracted_text']}")
601
+ context_parts.append("")
602
+
603
+ context_parts.extend([
604
+ "## Instructions:",
605
+ f"Create a comprehensive conference summary in {output_language}.",
606
+ "Follow the user's specific instructions for format and focus areas.",
607
+ "Integrate information from all sources (transcripts, documents, visual content).",
608
+ "Highlight key insights, decisions, action items, and important discussions.",
609
+ "Provide actionable recommendations based on the content.",
610
+ ])
611
+
612
+ return "\n".join(context_parts)
613
+
614
+ def _process_uploaded_file(self, file, user_id: str) -> Optional[Dict]:
615
+ """Process uploaded file"""
616
+ try:
617
+ if hasattr(file, 'name'):
618
+ file_path = file.name
619
+ filename = os.path.basename(file_path)
620
+ elif isinstance(file, str):
621
+ file_path = file
622
+ filename = os.path.basename(file_path)
623
+ else:
624
+ return None
625
+
626
+ if not os.path.exists(file_path):
627
+ return None
628
+
629
+ file_size = os.path.getsize(file_path)
630
+ if file_size > 500 * 1024 * 1024: # 500MB limit
631
+ return None
632
+
633
+ ext = filename.split('.')[-1].lower() if '.' in filename else ''
634
+
635
+ if ext in ['mp4', 'mov', 'avi', 'mkv', 'webm', 'flv', '3gp', 'wmv']:
636
+ return {
637
+ 'type': 'video',
638
+ 'filename': filename,
639
+ 'path': file_path,
640
+ 'extension': ext,
641
+ 'size': file_size
642
+ }
643
+ elif ext in ['jpg', 'jpeg', 'png', 'bmp', 'gif', 'tiff', 'webp']:
644
+ return {
645
+ 'type': 'image',
646
+ 'filename': filename,
647
+ 'path': file_path,
648
+ 'extension': ext,
649
+ 'size': file_size
650
+ }
651
+ elif ext in ['pdf', 'docx', 'doc', 'pptx', 'ppt', 'xlsx', 'xls', 'txt', 'json', 'csv']:
652
+ content = self.file_processor.process_file(file_path, ext)
653
+ if content:
654
+ return {
655
+ 'type': 'document',
656
+ 'filename': filename,
657
+ 'path': file_path,
658
+ 'extension': ext,
659
+ 'content': content,
660
+ 'size': file_size
661
+ }
662
+
663
+ return None
664
+
665
+ except Exception as e:
666
+ print(f"Error processing file: {e}")
667
+ return None
668
+
669
+ def _extract_significant_frames(self, video_path: str) -> List[Dict]:
670
+ """Extract significant frames from video"""
671
+ try:
672
+ frames = self.frame_extractor.extract_frames(video_path)
673
+ return frames if frames else []
674
+ except Exception as e:
675
+ print(f"Frame extraction failed: {e}")
676
+ return []
677
+
678
+ def _analyze_image_content(self, image_path: str) -> Optional[Dict]:
679
+ """Analyze image content with Computer Vision"""
680
+ if not self.cv_client:
681
+ return None
682
+
683
+ try:
684
+ with open(image_path, 'rb') as image_stream:
685
+ # OCR
686
+ ocr_result = self.cv_client.read_in_stream(image_stream, raw=True)
687
+ operation_id = ocr_result.headers["Operation-Location"].split("/")[-1]
688
+
689
+ # Wait for completion
690
+ timeout = 30
691
+ start_time = time.time()
692
+ while True:
693
+ if time.time() - start_time > timeout:
694
+ break
695
+
696
+ read_result = self.cv_client.get_read_result(operation_id)
697
+ if read_result.status not in ['notStarted', 'running']:
698
+ break
699
+ time.sleep(1)
700
+
701
+ # Extract text
702
+ extracted_text = ""
703
+ if read_result.status == OperationStatusCodes.succeeded:
704
+ for text_result in read_result.analyze_result.read_results:
705
+ for line in text_result.lines:
706
+ extracted_text += line.text + "\n"
707
+
708
+ # Get description
709
+ image_stream.seek(0)
710
+ description_result = self.cv_client.describe_image_in_stream(
711
+ image_stream,
712
+ max_candidates=3,
713
+ language='en'
714
+ )
715
+
716
+ return {
717
+ 'extracted_text': extracted_text.strip(),
718
+ 'description': description_result.captions[0].text if description_result.captions else "",
719
+ 'confidence': description_result.captions[0].confidence if description_result.captions else 0
720
+ }
721
+
722
+ except Exception as e:
723
+ print(f"Image analysis failed: {e}")
724
+ return None
725
+
726
+ def _get_existing_transcripts(self, transcript_job_ids: List[str], user_id: str) -> List[Dict]:
727
+ """Get existing transcripts using backend integration"""
728
+ transcripts = []
729
+
730
+ if self.backend_manager:
731
+ for job_id in transcript_job_ids:
732
+ try:
733
+ job = self.backend_manager.get_job_status(job_id)
734
+ if job and job.user_id == user_id and job.transcript_text:
735
+ transcripts.append({
736
+ 'source': f"Previous transcript: {job.original_filename}",
737
+ 'content': job.transcript_text
738
+ })
739
+ except Exception as e:
740
+ print(f"Error getting transcript {job_id[:8]}...: {e}")
741
+ else:
742
+ print("Backend manager not available, cannot retrieve existing transcripts")
743
+
744
+ return transcripts
745
+
746
+ def get_summary_status(self, job_id: str) -> Optional[SummaryJob]:
747
+ """Get current summary job status using backend integration"""
748
+ if self.backend_manager:
749
+ return self.backend_manager.get_summary_job(job_id)
750
+ else:
751
+ # Fallback to local storage
752
+ try:
753
+ job_file = f"temp/summary_jobs/{job_id}.json"
754
+ if os.path.exists(job_file):
755
+ with open(job_file, 'r', encoding='utf-8') as f:
756
+ job_data = json.load(f)
757
+
758
+ # Convert back to SummaryJob object
759
+ return SummaryJob(**job_data)
760
+ except Exception as e:
761
+ print(f"Error loading job from fallback storage: {e}")
762
+
763
+ return None
764
+
765
+ def get_user_summary_history(self, user_id: str, limit: int = 20) -> List[SummaryJob]:
766
+ """Get user's summary history using backend integration"""
767
+ if self.backend_manager:
768
+ return self.backend_manager.get_user_summary_history(user_id, limit)
769
+ else:
770
+ # Fallback to local storage
771
+ try:
772
+ history = []
773
+ jobs_dir = "temp/summary_jobs"
774
+ if os.path.exists(jobs_dir):
775
+ for filename in os.listdir(jobs_dir):
776
+ if filename.endswith('.json'):
777
+ job_file = os.path.join(jobs_dir, filename)
778
+ try:
779
+ with open(job_file, 'r', encoding='utf-8') as f:
780
+ job_data = json.load(f)
781
+
782
+ if job_data.get('user_id') == user_id:
783
+ history.append(SummaryJob(**job_data))
784
+ except Exception as e:
785
+ print(f"Error loading job file {filename}: {e}")
786
+
787
+ # Sort by creation date and limit
788
+ history.sort(key=lambda x: x.created_at, reverse=True)
789
+ return history[:limit]
790
+ except Exception as e:
791
+ print(f"Error getting user history from fallback storage: {e}")
792
+
793
+ return []
794
+
795
+ # Global AI summary manager instance with enhanced backend integration
796
+ ai_summary_manager = AISummaryManager()
app.py ADDED
@@ -0,0 +1,1661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import json
4
+ import os
5
+ import subprocess
6
+ from datetime import datetime, timedelta
7
+ from typing import List, Tuple, Optional
8
+ from backend import (
9
+ ALLOWED_LANGS, AUDIO_FORMATS, transcription_manager,
10
+ allowed_file, User
11
+ )
12
+
13
+ # Import new AI Summary components
14
+ try:
15
+ from ai_summary import ai_summary_manager, SummaryJob
16
+ from file_processors import FileProcessor
17
+ from image_extraction import VideoFrameExtractor
18
+ AI_FEATURES_AVAILABLE = True
19
+ print("✅ AI Summary features loaded successfully")
20
+ except ImportError as e:
21
+ AI_FEATURES_AVAILABLE = False
22
+ print(f"⚠️ AI Summary features not available: {e}")
23
+ ai_summary_manager = None
24
+
25
+ def format_status(status):
26
+ """Convert status to user-friendly format"""
27
+ status_map = {
28
+ 'pending': '⏳ Queued',
29
+ 'processing': '🔄 Processing',
30
+ 'completed': '✅ Done',
31
+ 'failed': '❌ Failed'
32
+ }
33
+ return status_map.get(status, status)
34
+
35
+ def format_processing_time(created_at, completed_at=None):
36
+ """Calculate and format processing time"""
37
+ try:
38
+ start_time = datetime.fromisoformat(created_at)
39
+ if completed_at:
40
+ end_time = datetime.fromisoformat(completed_at)
41
+ duration = end_time - start_time
42
+ else:
43
+ duration = datetime.now() - start_time
44
+
45
+ total_seconds = int(duration.total_seconds())
46
+ if total_seconds < 60:
47
+ return f"{total_seconds}s"
48
+ elif total_seconds < 3600:
49
+ minutes = total_seconds // 60
50
+ seconds = total_seconds % 60
51
+ return f"{minutes}m {seconds}s"
52
+ else:
53
+ hours = total_seconds // 3600
54
+ minutes = (total_seconds % 3600) // 60
55
+ return f"{hours}h {minutes}m"
56
+ except:
57
+ return "Unknown"
58
+
59
+ def get_user_stats_display(user: User):
60
+ """Get user statistics for display"""
61
+ if not user:
62
+ return "👤 Please log in to view statistics"
63
+
64
+ try:
65
+ # Use extended stats that include AI summaries
66
+ if hasattr(transcription_manager, 'get_user_stats_extended'):
67
+ stats = transcription_manager.get_user_stats_extended(user.user_id)
68
+ else:
69
+ stats = transcription_manager.get_user_stats(user.user_id)
70
+
71
+ total = stats.get('total_jobs', 0)
72
+ summary_total = stats.get('total_summary_jobs', 0)
73
+ recent = stats.get('recent_jobs', 0)
74
+ by_status = stats.get('by_status', {})
75
+
76
+ completed = by_status.get('completed', 0)
77
+ processing = by_status.get('processing', 0)
78
+ pending = by_status.get('pending', 0)
79
+ failed = by_status.get('failed', 0)
80
+
81
+ stats_text = f"👤 {user.username} | 📊 Transcripts: {total} | 🤖 AI Summaries: {summary_total} | ✅ Completed: {completed}"
82
+ if processing > 0:
83
+ stats_text += f" | 🔄 Processing: {processing}"
84
+ if pending > 0:
85
+ stats_text += f" | ⏳ Pending: {pending}"
86
+ if failed > 0:
87
+ stats_text += f" | ❌ Failed: {failed}"
88
+ if recent > 0:
89
+ stats_text += f" | 📅 Last 7 days: {recent}"
90
+
91
+ return stats_text
92
+
93
+ except Exception as e:
94
+ return f"👤 {user.username} | Stats error: {str(e)}"
95
+
96
+ # Authentication Functions
97
+ def register_user(email, username, password, confirm_password, gdpr_consent, data_retention_consent, marketing_consent):
98
+ """Register new user account"""
99
+ try:
100
+ print(f"📝 Registration attempt for: {username} ({email})")
101
+
102
+ # Validate inputs
103
+ if not email or not username or not password:
104
+ return "❌ All fields are required", gr.update(visible=False)
105
+
106
+ if password != confirm_password:
107
+ return "❌ Passwords do not match", gr.update(visible=False)
108
+
109
+ if not gdpr_consent:
110
+ return "❌ GDPR consent is required to create an account", gr.update(visible=False)
111
+
112
+ if not data_retention_consent:
113
+ return "❌ Data retention agreement is required", gr.update(visible=False)
114
+
115
+ # Attempt registration
116
+ success, message, user_id = transcription_manager.register_user(
117
+ email, username, password, gdpr_consent, data_retention_consent, marketing_consent
118
+ )
119
+
120
+ print(f"📝 Registration result: success={success}, message={message}")
121
+
122
+ if success:
123
+ print(f"✅ User registered successfully: {username}")
124
+ return f"✅ {message}! Please log in with your credentials.", gr.update(visible=True)
125
+ else:
126
+ print(f"❌ Registration failed: {message}")
127
+ return f"❌ {message}", gr.update(visible=False)
128
+
129
+ except Exception as e:
130
+ print(f"❌ Registration error: {str(e)}")
131
+ return f"❌ Registration error: {str(e)}", gr.update(visible=False)
132
+
133
+ def login_user(login, password):
134
+ """Login user"""
135
+ try:
136
+ print(f"🔑 Login attempt for: {login}")
137
+
138
+ if not login or not password:
139
+ return "❌ Please enter both username/email and password", None, gr.update(visible=True), gr.update(visible=False), "👤 Please log in to view your statistics..."
140
+
141
+ success, message, user = transcription_manager.login_user(login, password)
142
+ print(f"🔑 Login result: success={success}, message={message}")
143
+
144
+ if success and user:
145
+ print(f"✅ User logged in successfully: {user.username}")
146
+ stats_display = get_user_stats_display(user)
147
+ return f"✅ Welcome back, {user.username}!", user, gr.update(visible=False), gr.update(visible=True), stats_display
148
+ else:
149
+ print(f"❌ Login failed: {message}")
150
+ return f"❌ {message}", None, gr.update(visible=True), gr.update(visible=False), "👤 Please log in to view your statistics..."
151
+
152
+ except Exception as e:
153
+ print(f"❌ Login error: {str(e)}")
154
+ return f"❌ Login error: {str(e)}", None, gr.update(visible=True), gr.update(visible=False), "👤 Please log in to view your statistics..."
155
+
156
+ def logout_user():
157
+ """Logout user"""
158
+ print("👋 User logged out")
159
+ return None, "👋 You have been logged out. Please log in to continue.", gr.update(visible=True), gr.update(visible=False), "👤 Please log in to view your statistics..."
160
+
161
+ # Transcription Functions
162
+ def submit_transcription(file, language, audio_format, diarization_enabled, speakers,
163
+ profanity, punctuation, timestamps, lexical, user):
164
+ """Submit transcription job - requires authenticated user"""
165
+ if not user:
166
+ return (
167
+ "❌ Please log in to submit transcriptions",
168
+ "",
169
+ gr.update(visible=False),
170
+ "",
171
+ {},
172
+ gr.update(visible=False),
173
+ gr.update()
174
+ )
175
+
176
+ if file is None:
177
+ return (
178
+ "Please upload an audio or video file first.",
179
+ "",
180
+ gr.update(visible=False),
181
+ "",
182
+ {},
183
+ gr.update(visible=False),
184
+ gr.update()
185
+ )
186
+
187
+ try:
188
+ # Get file data
189
+ try:
190
+ if isinstance(file, str):
191
+ if os.path.exists(file):
192
+ with open(file, 'rb') as f:
193
+ file_bytes = f.read()
194
+ original_filename = os.path.basename(file)
195
+ else:
196
+ return (
197
+ "File not found. Please try uploading again.",
198
+ "",
199
+ gr.update(visible=False),
200
+ "",
201
+ {},
202
+ gr.update(visible=False),
203
+ gr.update()
204
+ )
205
+ else:
206
+ file_path = str(file)
207
+ if os.path.exists(file_path):
208
+ with open(file_path, 'rb') as f:
209
+ file_bytes = f.read()
210
+ original_filename = os.path.basename(file_path)
211
+ else:
212
+ return (
213
+ "Unable to process file. Please try again.",
214
+ "",
215
+ gr.update(visible=False),
216
+ "",
217
+ {},
218
+ gr.update(visible=False),
219
+ gr.update()
220
+ )
221
+ except Exception as e:
222
+ return (
223
+ f"Error reading file: {str(e)}",
224
+ "",
225
+ gr.update(visible=False),
226
+ "",
227
+ {},
228
+ gr.update(visible=False),
229
+ gr.update()
230
+ )
231
+
232
+ # Validate file
233
+ file_extension = original_filename.split('.')[-1].lower() if '.' in original_filename else ""
234
+ supported_extensions = set(AUDIO_FORMATS) | {
235
+ 'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4a', '3gp', 'f4v',
236
+ 'wmv', 'asf', 'rm', 'rmvb', 'flv', 'mpg', 'mpeg', 'mts', 'vob'
237
+ }
238
+
239
+ if file_extension not in supported_extensions and file_extension != "":
240
+ return (
241
+ f"Unsupported file format: .{file_extension}",
242
+ "",
243
+ gr.update(visible=False),
244
+ "",
245
+ {},
246
+ gr.update(visible=False),
247
+ gr.update()
248
+ )
249
+
250
+ # Basic file size check
251
+ if len(file_bytes) > 500 * 1024 * 1024: # 500MB limit
252
+ return (
253
+ "File too large. Please upload files smaller than 500MB.",
254
+ "",
255
+ gr.update(visible=False),
256
+ "",
257
+ {},
258
+ gr.update(visible=False),
259
+ gr.update()
260
+ )
261
+
262
+ # Prepare settings
263
+ settings = {
264
+ 'audio_format': audio_format,
265
+ 'diarization_enabled': diarization_enabled,
266
+ 'speakers': speakers,
267
+ 'profanity': profanity,
268
+ 'punctuation': punctuation,
269
+ 'timestamps': timestamps,
270
+ 'lexical': lexical
271
+ }
272
+
273
+ # Submit job
274
+ job_id = transcription_manager.submit_transcription(
275
+ file_bytes, original_filename, user.user_id, language, settings
276
+ )
277
+
278
+ # Update job state
279
+ job_state = {
280
+ 'current_job_id': job_id,
281
+ 'start_time': datetime.now().isoformat(),
282
+ 'auto_refresh_active': True,
283
+ 'last_status': 'pending'
284
+ }
285
+
286
+ # Get updated user stats
287
+ stats_display = get_user_stats_display(user)
288
+
289
+ return (
290
+ f"🚀 Transcription started for: {original_filename}\n🔄 Auto-refreshing every 10 seconds...",
291
+ "",
292
+ gr.update(visible=False),
293
+ f"Job ID: {job_id}",
294
+ job_state,
295
+ gr.update(visible=True, value="🔄 Auto-refresh active"),
296
+ stats_display
297
+ )
298
+
299
+ except Exception as e:
300
+ print(f"❌ Error submitting transcription: {str(e)}")
301
+ return (
302
+ f"Error: {str(e)}",
303
+ "",
304
+ gr.update(visible=False),
305
+ "",
306
+ {},
307
+ gr.update(visible=False),
308
+ gr.update()
309
+ )
310
+
311
+ def check_transcription_status(job_state, user):
312
+ """Check transcription status"""
313
+ if not user:
314
+ return "❌ Please log in to check status", "", gr.update(visible=False)
315
+
316
+ if not job_state or 'current_job_id' not in job_state:
317
+ return "No active transcription job", "", gr.update(visible=False)
318
+
319
+ try:
320
+ job_id = job_state['current_job_id']
321
+ job = transcription_manager.get_job_status(job_id)
322
+
323
+ if not job:
324
+ return "❌ Job not found", "", gr.update(visible=False)
325
+
326
+ if job.user_id != user.user_id:
327
+ return "❌ Access denied", "", gr.update(visible=False)
328
+
329
+ processing_time = format_processing_time(job.created_at, job.completed_at)
330
+
331
+ if job.status == 'completed' and job.transcript_text:
332
+ job_state['auto_refresh_active'] = False
333
+
334
+ # Create downloadable file
335
+ transcript_file = create_transcript_file(job.transcript_text, job_id, job.original_filename)
336
+
337
+ stats_display = get_user_stats_display(user)
338
+
339
+ return (
340
+ f"✅ Transcription completed in {processing_time}!\n📚 Result saved to your history.",
341
+ job.transcript_text,
342
+ gr.update(visible=True, value=transcript_file) if transcript_file else gr.update(visible=False)
343
+ )
344
+
345
+ elif job.status == 'failed':
346
+ job_state['auto_refresh_active'] = False
347
+ error_msg = job.error_message[:100] + "..." if job.error_message and len(job.error_message) > 100 else job.error_message or "Unknown error"
348
+ return (
349
+ f"❌ Transcription failed after {processing_time}: {error_msg}",
350
+ "",
351
+ gr.update(visible=False)
352
+ )
353
+
354
+ elif job.status == 'processing':
355
+ return (
356
+ f"🔄 Transcription in progress... ({processing_time} elapsed)\n🎯 Converting speech to text with Azure AI...",
357
+ "",
358
+ gr.update(visible=False)
359
+ )
360
+ else:
361
+ return (
362
+ f"⏳ Transcription queued... ({processing_time} waiting)\n🚀 Your job will start processing shortly...",
363
+ "",
364
+ gr.update(visible=False)
365
+ )
366
+
367
+ except Exception as e:
368
+ print(f"❌ Error checking status: {e}")
369
+ return f"❌ Error checking status: {str(e)}", "", gr.update(visible=False)
370
+
371
+ def create_transcript_file(transcript_text, job_id, filename):
372
+ """Create downloadable transcript file"""
373
+ try:
374
+ os.makedirs("temp", exist_ok=True)
375
+ safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
376
+ output_filename = f"temp/transcript_{job_id}_{safe_filename}.txt"
377
+
378
+ content = f"TRANSCRIPT: {filename}\n"
379
+ content += f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
380
+ content += f"Job ID: {job_id}\n"
381
+ content += "=" * 50 + "\n\n"
382
+ content += transcript_text
383
+
384
+ with open(output_filename, "w", encoding="utf-8") as f:
385
+ f.write(content)
386
+
387
+ return output_filename
388
+ except Exception as e:
389
+ print(f"❌ Error creating transcript file: {e}")
390
+ return None
391
+
392
+ # AI Summary Functions
393
+ def get_available_transcripts(user):
394
+ """Get available transcripts for dropdown selection"""
395
+ if not user or not AI_FEATURES_AVAILABLE:
396
+ return gr.update(choices=[], value=[])
397
+
398
+ try:
399
+ jobs = transcription_manager.get_user_history(user.user_id, limit=50)
400
+ completed_jobs = [job for job in jobs if job.status == 'completed' and job.transcript_text]
401
+
402
+ choices = []
403
+ for job in completed_jobs:
404
+ date_str = job.created_at[:10]
405
+ char_count = len(job.transcript_text) if job.transcript_text else 0
406
+ display_text = f"{job.original_filename} ({date_str}) - {char_count:,} chars"
407
+ choices.append((display_text, job.job_id))
408
+
409
+ return gr.update(choices=choices, value=[])
410
+
411
+ except Exception as e:
412
+ print(f"❌ Error getting available transcripts: {e}")
413
+ return gr.update(choices=[], value=[])
414
+
415
+ def submit_ai_summary(
416
+ user,
417
+ processing_mode,
418
+ selected_transcripts,
419
+ media_files,
420
+ support_files,
421
+ user_prompt,
422
+ output_format,
423
+ output_language,
424
+ focus_areas,
425
+ include_timestamps,
426
+ include_action_items
427
+ ):
428
+ """Submit AI summary job with improved validation and UI handling"""
429
+ if not user:
430
+ return "❌ Please log in to use AI summarization", "", gr.update(visible=False), {}, gr.update(selected=0)
431
+
432
+ if not AI_FEATURES_AVAILABLE:
433
+ return "❌ AI features are not available. Please check service configuration.", "", gr.update(visible=False), {}, gr.update(selected=0)
434
+
435
+ try:
436
+ # Validate inputs based on processing mode
437
+ if processing_mode == "existing_transcripts":
438
+ if not selected_transcripts:
439
+ return "❌ Please select at least one transcript", "", gr.update(visible=False), {}, gr.update()
440
+ files_to_process = None
441
+ transcript_ids = selected_transcripts
442
+
443
+ elif processing_mode == "new_media":
444
+ if not media_files:
445
+ return "❌ Please upload at least one media file", "", gr.update(visible=False), {}, gr.update()
446
+ files_to_process = media_files
447
+ transcript_ids = None
448
+
449
+ else:
450
+ return "❌ Invalid processing mode selected", "", gr.update(visible=False), {}, gr.update()
451
+
452
+ if not user_prompt.strip():
453
+ return "❌ Please provide instructions for the AI summary", "", gr.update(visible=False), {}, gr.update()
454
+
455
+ # Combine all files for processing
456
+ all_files = []
457
+ if files_to_process:
458
+ all_files.extend(files_to_process)
459
+ if support_files:
460
+ all_files.extend(support_files)
461
+
462
+ # Prepare enhanced user prompt
463
+ enhanced_prompt = f"""
464
+ User Instructions: {user_prompt}
465
+
466
+ Output Format: {output_format}
467
+ Focus Areas: {focus_areas if focus_areas else 'General summary'}
468
+ Include Timestamps: {'Yes' if include_timestamps else 'No'}
469
+ Include Action Items: {'Yes' if include_action_items else 'No'}
470
+
471
+ Please create a comprehensive conference summary following these specifications.
472
+ """
473
+
474
+ # Prepare settings
475
+ settings = {
476
+ 'output_format': output_format,
477
+ 'output_language': output_language,
478
+ 'focus_areas': focus_areas,
479
+ 'include_timestamps': include_timestamps,
480
+ 'include_action_items': include_action_items,
481
+ 'processing_mode': processing_mode
482
+ }
483
+
484
+ # Submit AI summary job
485
+ job_id = ai_summary_manager.submit_summary_job(
486
+ user_id=user.user_id,
487
+ summary_type=processing_mode,
488
+ user_prompt=enhanced_prompt,
489
+ files=all_files if all_files else None,
490
+ transcript_job_ids=transcript_ids,
491
+ settings=settings
492
+ )
493
+
494
+ print(f"🤖 [{user.username}] AI summary job submitted: {job_id[:8]}...")
495
+
496
+ # Update job state for tracking
497
+ summary_job_state = {
498
+ 'current_summary_job_id': job_id,
499
+ 'start_time': datetime.now().isoformat(),
500
+ 'auto_refresh_active': True,
501
+ 'last_status': 'pending'
502
+ }
503
+
504
+ return (
505
+ f"🤖 AI Summary started!\n🔄 Processing your content with advanced AI...\nJob ID: {job_id[:8]}...\n\n⬆️ You can navigate to other tabs while processing continues in background.",
506
+ "",
507
+ gr.update(visible=False),
508
+ summary_job_state,
509
+ gr.update(selected=0)
510
+ )
511
+
512
+ except Exception as e:
513
+ print(f"❌ Error submitting AI summary: {e}")
514
+ return f"❌ Error: {str(e)}", "", gr.update(visible=False), {}, gr.update()
515
+
516
+ def check_summary_status(summary_job_state, user):
517
+ """Check AI summary job status"""
518
+ if not user or not AI_FEATURES_AVAILABLE:
519
+ return "❌ Please log in to check status", "", gr.update(visible=False)
520
+
521
+ if not summary_job_state or 'current_summary_job_id' not in summary_job_state:
522
+ return "No active AI summary job", "", gr.update(visible=False)
523
+
524
+ try:
525
+ job_id = summary_job_state['current_summary_job_id']
526
+ job = ai_summary_manager.get_summary_status(job_id)
527
+
528
+ if not job:
529
+ return "❌ Summary job not found", "", gr.update(visible=False)
530
+
531
+ processing_time = format_processing_time(job.created_at, job.completed_at)
532
+
533
+ if job.status == 'completed' and job.summary_text:
534
+ summary_job_state['auto_refresh_active'] = False
535
+
536
+ summary_file = create_summary_file(job.summary_text, job_id)
537
+ stats_display = get_user_stats_display(user)
538
+
539
+ return (
540
+ f"✅ AI Summary completed in {processing_time}!\n📚 Result saved to your history.",
541
+ job.summary_text,
542
+ gr.update(visible=True, value=summary_file) if summary_file else gr.update(visible=False)
543
+ )
544
+
545
+ elif job.status == 'failed':
546
+ summary_job_state['auto_refresh_active'] = False
547
+ error_msg = job.error_message[:100] + "..." if job.error_message and len(job.error_message) > 100 else job.error_message or "Unknown error"
548
+ return (
549
+ f"❌ AI Summary failed after {processing_time}: {error_msg}",
550
+ "",
551
+ gr.update(visible=False)
552
+ )
553
+
554
+ elif job.status == 'processing':
555
+ processed_info = ""
556
+ if job.processed_files:
557
+ info_parts = []
558
+ if job.processed_files.get('transcript_count', 0) > 0:
559
+ info_parts.append(f"{job.processed_files['transcript_count']} transcripts")
560
+ if job.processed_files.get('document_count', 0) > 0:
561
+ info_parts.append(f"{job.processed_files['document_count']} documents")
562
+ if job.processed_files.get('image_count', 0) > 0:
563
+ info_parts.append(f"{job.processed_files['image_count']} images")
564
+
565
+ if info_parts:
566
+ processed_info = f"\n📊 Processing: {', '.join(info_parts)}"
567
+
568
+ return (
569
+ f"🤖 AI Analysis in progress... ({processing_time} elapsed){processed_info}\n🔄 Using advanced AI to analyze your content...",
570
+ "",
571
+ gr.update(visible=False)
572
+ )
573
+ else:
574
+ return (
575
+ f"⏳ AI Summary queued... ({processing_time} waiting)\n🔄 Your job will start processing shortly...",
576
+ "",
577
+ gr.update(visible=False)
578
+ )
579
+
580
+ except Exception as e:
581
+ print(f"❌ Error checking summary status: {e}")
582
+ return f"❌ Error checking status: {str(e)}", "", gr.update(visible=False)
583
+
584
+ def create_summary_file(summary_text, job_id):
585
+ """Create downloadable summary file"""
586
+ try:
587
+ os.makedirs("temp", exist_ok=True)
588
+ filename = f"temp/ai_summary_{job_id}.md"
589
+
590
+ content = f"""# AI Conference Summary
591
+ *Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
592
+ *Job ID: {job_id}*
593
+
594
+ ---
595
+
596
+ {summary_text}
597
+
598
+ ---
599
+ *Generated by AI Conference Summarization Service*
600
+ """
601
+
602
+ with open(filename, "w", encoding="utf-8") as f:
603
+ f.write(content)
604
+
605
+ return filename
606
+ except Exception as e:
607
+ print(f"❌ Error creating summary file: {e}")
608
+ return None
609
+
610
+ # Download Functions
611
+ def download_transcript_by_id(user, job_id):
612
+ """Download transcript by job ID"""
613
+ if not user or not job_id:
614
+ return None
615
+
616
+ try:
617
+ # Get transcript content
618
+ transcript_content = transcription_manager.download_transcript(job_id, user.user_id)
619
+ if not transcript_content:
620
+ return None
621
+
622
+ # Get job info
623
+ job = transcription_manager.get_job_status(job_id)
624
+ if not job:
625
+ return None
626
+
627
+ # Create file
628
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
629
+ safe_filename = "".join(c for c in job.original_filename if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
630
+
631
+ import tempfile
632
+ temp_file = tempfile.NamedTemporaryFile(
633
+ mode='w',
634
+ suffix='.txt',
635
+ prefix=f"transcript_{timestamp}_",
636
+ delete=False,
637
+ encoding='utf-8'
638
+ )
639
+
640
+ # Write content
641
+ content = f"TRANSCRIPT\n"
642
+ content += f"File: {job.original_filename}\n"
643
+ content += f"Language: {ALLOWED_LANGS.get(job.language, job.language)}\n"
644
+ content += f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
645
+ content += f"Job ID: {job_id}\n"
646
+ content += f"User: {user.username}\n"
647
+ content += "=" * 60 + "\n\n"
648
+ content += transcript_content
649
+ content += f"\n\n" + "=" * 60
650
+ content += f"\nGenerated by Azure Speech Transcription Service"
651
+
652
+ temp_file.write(content)
653
+ temp_file.close()
654
+
655
+ return temp_file.name
656
+
657
+ except Exception as e:
658
+ print(f"Error downloading transcript: {e}")
659
+ return None
660
+
661
+ def download_summary_by_id(user, job_id):
662
+ """Download summary by job ID"""
663
+ if not user or not job_id or not AI_FEATURES_AVAILABLE:
664
+ return None
665
+
666
+ try:
667
+ # Get summary job
668
+ job = None
669
+ if hasattr(ai_summary_manager, 'get_summary_status'):
670
+ job = ai_summary_manager.get_summary_status(job_id)
671
+ elif hasattr(transcription_manager, 'get_summary_job'):
672
+ job = transcription_manager.get_summary_job(job_id)
673
+
674
+ if not job or not job.summary_text or job.user_id != user.user_id:
675
+ return None
676
+
677
+ # Create file
678
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
679
+ topic_name = "AI_Summary"
680
+ if hasattr(job, 'original_files') and job.original_files:
681
+ topic_name = str(job.original_files[0])
682
+ topic_name = "".join(c for c in topic_name if c.isalnum() or c in (' ', '-', '_')).strip()
683
+ topic_name = topic_name.replace(' ', '_')
684
+
685
+ import tempfile
686
+ temp_file = tempfile.NamedTemporaryFile(
687
+ mode='w',
688
+ suffix='.md',
689
+ prefix=f"summary_{timestamp}_",
690
+ delete=False,
691
+ encoding='utf-8'
692
+ )
693
+
694
+ # Write content
695
+ content = f"# AI Conference Summary\n\n"
696
+ content += f"**Topic:** {topic_name.replace('_', ' ')}\n"
697
+ content += f"**Type:** {getattr(job, 'summary_type', 'Conference Summary')}\n"
698
+ content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
699
+ content += f"**Job ID:** {job_id}\n"
700
+ content += f"**User:** {user.username}\n\n"
701
+ content += "---\n\n"
702
+ content += job.summary_text
703
+ content += f"\n\n---\n\n"
704
+ content += "*Generated by AI Conference Summarization Service*"
705
+
706
+ temp_file.write(content)
707
+ temp_file.close()
708
+
709
+ return temp_file.name
710
+
711
+ except Exception as e:
712
+ print(f"Error downloading summary: {e}")
713
+ return None
714
+
715
+ # IMPROVED History Functions with Interactive Downloads
716
+ def get_transcript_history_with_downloads(user, show_all=False):
717
+ """Get transcription history with downloadable links"""
718
+ if not user:
719
+ return [], "👤 Please log in to view transcription history", []
720
+
721
+ try:
722
+ limit = 100 if show_all else 20
723
+ jobs = transcription_manager.get_user_history(user.user_id, limit=limit)
724
+
725
+ if not jobs:
726
+ return [], f"📂 No transcriptions found for {user.username}", []
727
+
728
+ table_data = []
729
+ download_data = [] # Store job IDs for downloads
730
+ completed_count = 0
731
+
732
+ for job in jobs:
733
+ try:
734
+ created_time = datetime.fromisoformat(job.created_at)
735
+ formatted_date = created_time.strftime("%Y-%m-%d %H:%M")
736
+ except:
737
+ formatted_date = job.created_at[:16] if job.created_at else "Unknown"
738
+
739
+ status_display = format_status(job.status)
740
+ time_display = format_processing_time(job.created_at, job.completed_at)
741
+ language_display = ALLOWED_LANGS.get(job.language, job.language)
742
+ job_id_short = job.job_id[:8] + "..."
743
+
744
+ # Download link
745
+ if job.status == 'completed' and job.transcript_text:
746
+ completed_count += 1
747
+ download_text = f"📥 Download"
748
+ download_data.append(job.job_id)
749
+ else:
750
+ download_text = "Not Available"
751
+ download_data.append("")
752
+
753
+ table_data.append([
754
+ formatted_date,
755
+ job.original_filename,
756
+ language_display,
757
+ status_display,
758
+ time_display,
759
+ job_id_short,
760
+ download_text
761
+ ])
762
+
763
+ summary_text = f"📊 {len(jobs)} transcriptions ({completed_count} completed)"
764
+ return table_data, summary_text, download_data
765
+
766
+ except Exception as e:
767
+ return [], f"❌ Error: {str(e)}", []
768
+
769
+ def get_summary_history_with_downloads(user, show_all=False):
770
+ """Get AI summary history with downloadable links"""
771
+ if not user:
772
+ return [], "👤 Please log in to view AI summary history", []
773
+
774
+ if not AI_FEATURES_AVAILABLE:
775
+ return [], "AI Summary features not available", []
776
+
777
+ try:
778
+ limit = 100 if show_all else 20
779
+ jobs = []
780
+
781
+ if hasattr(transcription_manager, 'get_user_summary_history'):
782
+ jobs = transcription_manager.get_user_summary_history(user.user_id, limit=limit)
783
+ elif hasattr(ai_summary_manager, 'get_user_summary_history'):
784
+ jobs = ai_summary_manager.get_user_summary_history(user.user_id, limit=limit)
785
+
786
+ if not jobs:
787
+ return [], f"📂 No AI summaries found for {user.username}", []
788
+
789
+ table_data = []
790
+ download_data = [] # Store job IDs for downloads
791
+ completed_count = 0
792
+
793
+ for job in jobs:
794
+ try:
795
+ created_time = datetime.fromisoformat(job.created_at)
796
+ formatted_date = created_time.strftime("%Y-%m-%d %H:%M")
797
+ except:
798
+ formatted_date = job.created_at[:16] if job.created_at else "Unknown"
799
+
800
+ status_display = format_status(job.status)
801
+ time_display = format_processing_time(job.created_at, job.completed_at)
802
+ job_id_short = job.job_id[:8] + "..."
803
+
804
+ # Get topic name
805
+ topic_name = "AI Summary"
806
+ if hasattr(job, 'original_files') and job.original_files:
807
+ topic_name = str(job.original_files[0])
808
+ if len(topic_name) > 40:
809
+ topic_name = topic_name[:37] + "..."
810
+
811
+ type_display = getattr(job, 'summary_type', 'Conference')
812
+
813
+ # Download link
814
+ if job.status == 'completed' and job.summary_text:
815
+ completed_count += 1
816
+ download_text = f"📥 Download"
817
+ download_data.append(job.job_id)
818
+ else:
819
+ download_text = "Not Available"
820
+ download_data.append("")
821
+
822
+ table_data.append([
823
+ formatted_date,
824
+ topic_name,
825
+ type_display,
826
+ status_display,
827
+ time_display,
828
+ job_id_short,
829
+ download_text
830
+ ])
831
+
832
+ summary_text = f"🤖 {len(jobs)} AI summaries ({completed_count} completed)"
833
+ return table_data, summary_text, download_data
834
+
835
+ except Exception as e:
836
+ return [], f"❌ Error: {str(e)}", []
837
+
838
+ def handle_transcript_table_select(user, evt: gr.SelectData):
839
+ """Handle transcript table row selection for download"""
840
+ if not user or not evt:
841
+ return None
842
+
843
+ try:
844
+ # Get the current table data and download data
845
+ table_data, _, download_data = get_transcript_history_with_downloads(user)
846
+
847
+ if evt.index[0] < len(download_data):
848
+ job_id = download_data[evt.index[0]]
849
+ if job_id: # Only download if job_id exists (completed jobs)
850
+ download_file = download_transcript_by_id(user, job_id)
851
+ return download_file
852
+
853
+ return None
854
+
855
+ except Exception as e:
856
+ print(f"Error handling transcript table selection: {e}")
857
+ return None
858
+
859
+ def handle_summary_table_select(user, evt: gr.SelectData):
860
+ """Handle summary table row selection for download"""
861
+ if not user or not evt:
862
+ return None
863
+
864
+ try:
865
+ # Get the current table data and download data
866
+ table_data, _, download_data = get_summary_history_with_downloads(user)
867
+
868
+ if evt.index[0] < len(download_data):
869
+ job_id = download_data[evt.index[0]]
870
+ if job_id: # Only download if job_id exists (completed jobs)
871
+ download_file = download_summary_by_id(user, job_id)
872
+ return download_file
873
+
874
+ return None
875
+
876
+ except Exception as e:
877
+ print(f"Error handling summary table selection: {e}")
878
+ return None
879
+
880
+ def refresh_all_history(user, show_all):
881
+ """Refresh all history"""
882
+ if not user:
883
+ empty_msg = "👤 Please log in to view your history"
884
+ return [], empty_msg, [], empty_msg
885
+
886
+ # Get transcript data
887
+ transcript_data, transcript_summary, _ = get_transcript_history_with_downloads(user, show_all)
888
+
889
+ # Get summary data
890
+ summary_data, summary_summary, _ = get_summary_history_with_downloads(user, show_all)
891
+
892
+ return transcript_data, transcript_summary, summary_data, summary_summary
893
+
894
+ # Enhanced CSS with better table styling
895
+ enhanced_css = """
896
+ /* Main container styling */
897
+ .gradio-container {
898
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
899
+ font-family: 'Segoe UI', system-ui, sans-serif;
900
+ color: #212529;
901
+ }
902
+
903
+ /* Card styling */
904
+ .gr-box {
905
+ background: white;
906
+ border: 1px solid #dee2e6;
907
+ border-radius: 12px;
908
+ box-shadow: 0 2px 8px rgba(0,0,0,0.08);
909
+ padding: 20px;
910
+ margin: 10px 0;
911
+ }
912
+
913
+ /* Button styling */
914
+ .gr-button {
915
+ background: linear-gradient(135deg, #007bff, #0056b3);
916
+ border: none;
917
+ border-radius: 8px;
918
+ color: white;
919
+ font-weight: 500;
920
+ padding: 12px 24px;
921
+ transition: all 0.2s ease;
922
+ box-shadow: 0 2px 4px rgba(0,123,255,0.2);
923
+ }
924
+
925
+ .gr-button:hover {
926
+ background: linear-gradient(135deg, #0056b3, #004085);
927
+ transform: translateY(-1px);
928
+ box-shadow: 0 4px 8px rgba(0,123,255,0.3);
929
+ }
930
+
931
+ /* Table styling for history */
932
+ .history-table table {
933
+ width: 100% !important;
934
+ }
935
+
936
+ .history-table td:last-child {
937
+ cursor: pointer !important;
938
+ color: #007bff !important;
939
+ font-weight: 600 !important;
940
+ }
941
+
942
+ .history-table td:last-child:hover {
943
+ background-color: #e7f3ff !important;
944
+ text-decoration: underline !important;
945
+ }
946
+
947
+ /* Section headers */
948
+ .section-header {
949
+ background: linear-gradient(135deg, #007bff, #0056b3);
950
+ color: white;
951
+ padding: 10px 15px;
952
+ border-radius: 8px;
953
+ margin: 15px 0 10px 0;
954
+ font-weight: 600;
955
+ }
956
+
957
+ /* User stats styling */
958
+ .user-stats {
959
+ background: linear-gradient(135deg, #e8f5e8, #c8e6c9);
960
+ border: 1px solid #c8e6c9;
961
+ border-radius: 6px;
962
+ padding: 8px 12px;
963
+ font-size: 12px;
964
+ color: #2e7d32;
965
+ text-align: center;
966
+ font-weight: 500;
967
+ }
968
+
969
+ /* History table styling */
970
+ .history-table {
971
+ background: white;
972
+ border-radius: 8px;
973
+ overflow: hidden;
974
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
975
+ }
976
+
977
+ /* Download instruction styling */
978
+ .download-instructions {
979
+ background: #e8f5e8;
980
+ border: 1px solid #c8e6c9;
981
+ border-radius: 6px;
982
+ padding: 10px;
983
+ margin: 10px 0;
984
+ font-style: italic;
985
+ color: #2e7d32;
986
+ text-align: center;
987
+ }
988
+ """
989
+
990
+ # Create the main interface with improved History tab
991
+ with gr.Blocks(
992
+ theme=gr.themes.Soft(
993
+ primary_hue="blue",
994
+ secondary_hue="gray",
995
+ neutral_hue="gray",
996
+ font=["system-ui", "sans-serif"]
997
+ ),
998
+ css=enhanced_css,
999
+ title="🎙️ AI Conference Summarization Service - Enhanced with Computer Vision & Multi-format Processing"
1000
+ ) as demo:
1001
+
1002
+ # Global state
1003
+ current_user = gr.State(None)
1004
+ job_state = gr.State({})
1005
+ summary_job_state = gr.State({})
1006
+
1007
+ # Header
1008
+ with gr.Row():
1009
+ gr.HTML("""
1010
+ <div class="main-header">
1011
+ <h1>🎙️ AI Conference Summarization Service</h1>
1012
+ <p>Advanced AI-powered conference analysis with transcription, computer vision, and multi-format processing</p>
1013
+ </div>
1014
+ """)
1015
+
1016
+ # User stats display
1017
+ user_stats_display = gr.Textbox(
1018
+ label="",
1019
+ lines=1,
1020
+ interactive=False,
1021
+ show_label=False,
1022
+ placeholder="👤 Please log in to view your statistics...",
1023
+ elem_classes=["user-stats"]
1024
+ )
1025
+
1026
+ # Authentication Section
1027
+ with gr.Column(visible=True, elem_classes=["auth-form"]) as auth_section:
1028
+ gr.Markdown("## 🔐 Authentication Required")
1029
+ gr.Markdown("Please log in or create an account to use the AI conference summarization service.")
1030
+
1031
+ with gr.Tabs() as auth_tabs:
1032
+ # Login Tab
1033
+ with gr.Tab("🔑 Login") as login_tab:
1034
+ with gr.Column():
1035
+ login_email = gr.Textbox(
1036
+ label="Email or Username",
1037
+ placeholder="Enter your email or username"
1038
+ )
1039
+ login_password = gr.Textbox(
1040
+ label="Password",
1041
+ type="password",
1042
+ placeholder="Enter your password"
1043
+ )
1044
+
1045
+ with gr.Row():
1046
+ login_btn = gr.Button("🔑 Login", variant="primary", elem_classes=["auth-button"])
1047
+
1048
+ login_status = gr.Textbox(
1049
+ label="",
1050
+ show_label=False,
1051
+ interactive=False,
1052
+ placeholder="Enter your credentials and click Login"
1053
+ )
1054
+
1055
+ # Register Tab
1056
+ with gr.Tab("📝 Register") as register_tab:
1057
+ with gr.Column():
1058
+ reg_email = gr.Textbox(
1059
+ label="Email",
1060
+ placeholder="Enter your email address"
1061
+ )
1062
+ reg_username = gr.Textbox(
1063
+ label="Username",
1064
+ placeholder="Choose a username (3-30 characters, alphanumeric and underscore)"
1065
+ )
1066
+ reg_password = gr.Textbox(
1067
+ label="Password",
1068
+ type="password",
1069
+ placeholder="Create a strong password (min 8 chars, mixed case, numbers)"
1070
+ )
1071
+ reg_confirm_password = gr.Textbox(
1072
+ label="Confirm Password",
1073
+ type="password",
1074
+ placeholder="Confirm your password"
1075
+ )
1076
+
1077
+ gr.Markdown("### 📋 Privacy & Data Consent")
1078
+
1079
+ with gr.Column(elem_classes=["privacy-notice"]):
1080
+ gr.Markdown("""
1081
+ **Privacy Notice**: By creating an account, you acknowledge that:
1082
+ - Your data will be stored securely in user-separated Azure Blob Storage
1083
+ - AI processing uses Azure Cognitive Services (Speech, Computer Vision, AI Agents)
1084
+ - You can export or delete your data at any time
1085
+ - We comply with GDPR and data protection regulations
1086
+ """)
1087
+
1088
+ gdpr_consent = gr.Checkbox(
1089
+ label="I consent to the processing of my personal data as described in the Privacy Notice (Required)",
1090
+ value=False
1091
+ )
1092
+ data_retention_consent = gr.Checkbox(
1093
+ label="I agree to data retention for transcription and AI service functionality (Required)",
1094
+ value=False
1095
+ )
1096
+ marketing_consent = gr.Checkbox(
1097
+ label="I consent to receiving marketing communications (Optional)",
1098
+ value=False
1099
+ )
1100
+
1101
+ with gr.Row():
1102
+ register_btn = gr.Button("📝 Create Account", variant="primary", elem_classes=["auth-button"])
1103
+
1104
+ register_status = gr.Textbox(
1105
+ label="",
1106
+ show_label=False,
1107
+ interactive=False,
1108
+ placeholder="Fill out the form and agree to the required consents to create your account"
1109
+ )
1110
+
1111
+ login_after_register = gr.Button(
1112
+ "🔑 Go to Login",
1113
+ visible=False,
1114
+ variant="secondary"
1115
+ )
1116
+
1117
+ # Main Application (visible only when logged in)
1118
+ with gr.Column(visible=False) as main_app:
1119
+
1120
+ # Logout button
1121
+ with gr.Row():
1122
+ with gr.Column(scale=3):
1123
+ pass
1124
+ with gr.Column(scale=1):
1125
+ logout_btn = gr.Button("👋 Logout", variant="secondary")
1126
+
1127
+ with gr.Tabs() as main_tabs:
1128
+
1129
+ # AI Summary Conference Tab
1130
+ with gr.Tab("🤖 AI Summary Conference"):
1131
+ gr.Markdown("### 🎯 AI-Powered Conference Summarization")
1132
+
1133
+ with gr.Row():
1134
+ # First Column - Content Sources
1135
+ with gr.Column(scale=1):
1136
+ gr.HTML('<div class="section-header">📥 Content Sources</div>')
1137
+
1138
+ processing_mode = gr.Radio(
1139
+ choices=[
1140
+ ("Existing Transcripts", "existing_transcripts"),
1141
+ ("New Media Files", "new_media")
1142
+ ],
1143
+ value="existing_transcripts",
1144
+ label="Mode",
1145
+ elem_classes=["processing-mode"]
1146
+ )
1147
+
1148
+ with gr.Column() as existing_transcripts_section:
1149
+ available_transcripts = gr.Dropdown(
1150
+ choices=[],
1151
+ label="Available Transcripts",
1152
+ multiselect=True,
1153
+ interactive=True,
1154
+ elem_classes=["transcript-dropdown"]
1155
+ )
1156
+ refresh_transcripts_btn = gr.Button("🔄 Refresh", variant="secondary", size="sm")
1157
+
1158
+ with gr.Column(visible=False) as new_media_section:
1159
+ media_files = gr.Files(
1160
+ label="Media Files",
1161
+ file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm", ".flv", ".wav", ".mp3", ".ogg", ".opus", ".flac", ".m4a", ".aac"],
1162
+ file_count="multiple"
1163
+ )
1164
+
1165
+ support_files = gr.Files(
1166
+ label="Supporting Files",
1167
+ file_types=[".pdf", ".docx", ".doc", ".pptx", ".ppt", ".xlsx", ".xls", ".csv", ".txt", ".json", ".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"],
1168
+ file_count="multiple"
1169
+ )
1170
+
1171
+ # Second Column - AI Instructions
1172
+ with gr.Column(scale=1):
1173
+ gr.HTML('<div class="section-header">🎯 AI Instructions</div>')
1174
+
1175
+ user_prompt = gr.Textbox(
1176
+ label="Instructions for AI",
1177
+ placeholder="Describe the conference type, desired format, and any corrections...",
1178
+ lines=6
1179
+ )
1180
+
1181
+ with gr.Row():
1182
+ output_format = gr.Dropdown(
1183
+ choices=["Executive Summary", "Detailed Report", "Meeting Minutes", "Action Items List", "Key Insights & Decisions"],
1184
+ value="Executive Summary",
1185
+ label="Format"
1186
+ )
1187
+ output_language = gr.Dropdown(
1188
+ choices=["English", "Thai", "Spanish", "French", "German", "Japanese", "Chinese"],
1189
+ value="English",
1190
+ label="Language"
1191
+ )
1192
+
1193
+ focus_areas = gr.Textbox(
1194
+ label="Focus Areas",
1195
+ placeholder="e.g., financial results, decisions, technical discussions",
1196
+ lines=1
1197
+ )
1198
+
1199
+ with gr.Row():
1200
+ include_timestamps = gr.Checkbox(label="Timestamps", value=True)
1201
+ include_action_items = gr.Checkbox(label="Action Items", value=True)
1202
+
1203
+ submit_summary_btn = gr.Button("🚀 Generate AI Summary", variant="primary", size="lg")
1204
+
1205
+ # Third Column - Status & Results
1206
+ with gr.Column(scale=1):
1207
+ gr.HTML('<div class="section-header">🤖 Status & Results</div>')
1208
+
1209
+ summary_status_display = gr.Textbox(
1210
+ label="",
1211
+ lines=3,
1212
+ interactive=False,
1213
+ show_label=False,
1214
+ placeholder="Configure sources and click 'Generate AI Summary'...",
1215
+ elem_classes=["status-display"]
1216
+ )
1217
+
1218
+ refresh_summary_btn = gr.Button("🔄 Check Status", variant="secondary")
1219
+
1220
+ summary_output = gr.Textbox(
1221
+ label="AI Summary Results",
1222
+ lines=12,
1223
+ interactive=False,
1224
+ placeholder="Generated summary will appear here..."
1225
+ )
1226
+
1227
+ summary_download_file = gr.File(
1228
+ label="Download",
1229
+ interactive=False,
1230
+ visible=False
1231
+ )
1232
+
1233
+ # Traditional Transcription Tab
1234
+ with gr.Tab("🎙️ Transcribe"):
1235
+ with gr.Row():
1236
+ # Left column - Input settings
1237
+ with gr.Column(scale=1):
1238
+ gr.Markdown("### 📁 Upload File")
1239
+
1240
+ file_upload = gr.File(
1241
+ label="Audio or Video File",
1242
+ type="filepath",
1243
+ file_types=[
1244
+ ".wav", ".mp3", ".ogg", ".opus", ".flac", ".wma", ".aac",
1245
+ ".m4a", ".amr", ".webm", ".speex",
1246
+ ".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv", ".3gp"
1247
+ ]
1248
+ )
1249
+
1250
+ with gr.Row():
1251
+ language = gr.Dropdown(
1252
+ choices=[(v, k) for k, v in ALLOWED_LANGS.items()],
1253
+ label="Language",
1254
+ value="th-TH"
1255
+ )
1256
+ audio_format = gr.Dropdown(
1257
+ choices=AUDIO_FORMATS,
1258
+ value="wav",
1259
+ label="Output Format"
1260
+ )
1261
+
1262
+ gr.Markdown("### ⚙️ Settings")
1263
+
1264
+ with gr.Row():
1265
+ diarization_enabled = gr.Checkbox(
1266
+ label="Speaker Identification",
1267
+ value=True
1268
+ )
1269
+ speakers = gr.Slider(
1270
+ minimum=1,
1271
+ maximum=10,
1272
+ step=1,
1273
+ value=2,
1274
+ label="Max Speakers"
1275
+ )
1276
+
1277
+ with gr.Row():
1278
+ timestamps = gr.Checkbox(
1279
+ label="Timestamps",
1280
+ value=True
1281
+ )
1282
+ profanity = gr.Dropdown(
1283
+ choices=["masked", "removed", "raw"],
1284
+ value="masked",
1285
+ label="Profanity Filter"
1286
+ )
1287
+
1288
+ with gr.Row():
1289
+ punctuation = gr.Dropdown(
1290
+ choices=["automatic", "dictated", "none"],
1291
+ value="automatic",
1292
+ label="Punctuation"
1293
+ )
1294
+ lexical = gr.Checkbox(
1295
+ label="Lexical Form",
1296
+ value=False
1297
+ )
1298
+
1299
+ submit_btn = gr.Button(
1300
+ "🚀 Start Transcription",
1301
+ variant="primary",
1302
+ size="lg"
1303
+ )
1304
+
1305
+ # Right column - Results
1306
+ with gr.Column(scale=1):
1307
+ gr.Markdown("### 📊 Status")
1308
+
1309
+ auto_refresh_status_display = gr.Textbox(
1310
+ label="",
1311
+ lines=1,
1312
+ interactive=False,
1313
+ show_label=False,
1314
+ visible=False,
1315
+ elem_classes=["auto-refresh-indicator"]
1316
+ )
1317
+
1318
+ status_display = gr.Textbox(
1319
+ label="",
1320
+ lines=3,
1321
+ interactive=False,
1322
+ show_label=False,
1323
+ placeholder="Upload a file and click 'Start Transcription' to begin..."
1324
+ )
1325
+
1326
+ job_info = gr.Textbox(
1327
+ label="",
1328
+ lines=1,
1329
+ interactive=False,
1330
+ show_label=False,
1331
+ placeholder=""
1332
+ )
1333
+
1334
+ with gr.Row():
1335
+ refresh_btn = gr.Button(
1336
+ "🔄 Check Status",
1337
+ variant="secondary"
1338
+ )
1339
+ stop_refresh_btn = gr.Button(
1340
+ "ℹ️ Stop Auto-Refresh",
1341
+ variant="secondary"
1342
+ )
1343
+
1344
+ gr.Markdown("### 📄 Results")
1345
+
1346
+ transcript_output = gr.Textbox(
1347
+ label="Transcript",
1348
+ lines=12,
1349
+ interactive=False,
1350
+ placeholder="Your transcript with speaker identification and precise timestamps will appear here..."
1351
+ )
1352
+
1353
+ download_file = gr.File(
1354
+ label="Download",
1355
+ interactive=False,
1356
+ visible=False
1357
+ )
1358
+
1359
+ # History tab - COMPLETELY REDESIGNED WITH CLICKABLE DOWNLOADS
1360
+ with gr.Tab("📚 My History"):
1361
+ gr.Markdown("### 📋 Your Processing History")
1362
+
1363
+ with gr.Row():
1364
+ refresh_history_btn = gr.Button("🔄 Refresh History", variant="primary")
1365
+ show_all_checkbox = gr.Checkbox(label="Show All Jobs", value=False)
1366
+
1367
+ # TRANSCRIPTION HISTORY SECTION
1368
+ gr.Markdown("## 🎙️ Transcription History")
1369
+ gr.HTML('<div class="download-instructions">💡 Click on the "📥 Download" link in the table to instantly download completed transcripts</div>')
1370
+
1371
+ transcript_summary = gr.Textbox(
1372
+ label="", lines=1, interactive=False, show_label=False,
1373
+ placeholder="📂 Your transcription history will appear here..."
1374
+ )
1375
+
1376
+ transcript_table = gr.Dataframe(
1377
+ headers=["Date", "Filename", "Language", "Status", "Duration", "Job ID", "Download"],
1378
+ datatype=["str", "str", "str", "str", "str", "str", "str"],
1379
+ col_count=(7, "fixed"),
1380
+ row_count=(15, "dynamic"),
1381
+ interactive=False,
1382
+ elem_classes=["history-table"]
1383
+ )
1384
+
1385
+ # Hidden file component for transcript downloads
1386
+ transcript_download_file = gr.File(
1387
+ label="Downloaded Transcript",
1388
+ visible=True
1389
+ )
1390
+
1391
+ # AI SUMMARY HISTORY SECTION
1392
+ gr.Markdown("## 🤖 AI Summary History")
1393
+ gr.HTML('<div class="download-instructions">💡 Click on the "📥 Download" link in the table to instantly download completed AI summaries</div>')
1394
+
1395
+ summary_summary = gr.Textbox(
1396
+ label="", lines=1, interactive=False, show_label=False,
1397
+ placeholder="📂 Your AI summary history will appear here..."
1398
+ )
1399
+
1400
+ summary_table = gr.Dataframe(
1401
+ headers=["Date", "Topic", "Type", "Status", "Duration", "Job ID", "Download"],
1402
+ datatype=["str", "str", "str", "str", "str", "str", "str"],
1403
+ col_count=(7, "fixed"),
1404
+ row_count=(15, "dynamic"),
1405
+ interactive=False,
1406
+ elem_classes=["history-table"]
1407
+ )
1408
+
1409
+ # Hidden file component for summary downloads
1410
+ summary_download_file = gr.File(
1411
+ label="Downloaded Summary",
1412
+ visible=True
1413
+ )
1414
+
1415
+ # Help Tab
1416
+ with gr.Tab("❓ Help"):
1417
+ gr.Markdown("# 📋 AI Conference Summarization Service Guide")
1418
+
1419
+ with gr.Row():
1420
+ # Column 1: AI Conference Summarization
1421
+ with gr.Column(scale=1):
1422
+ gr.Markdown("""
1423
+ ## 🤖 AI Conference Summarization
1424
+
1425
+ **Advanced AI-powered service that creates comprehensive conference summaries**
1426
+
1427
+ ### Key Features
1428
+ - Multi-source processing (transcripts, documents, images, videos)
1429
+ - Computer vision extracts text from slides automatically
1430
+ - Custom AI instructions for personalized summaries
1431
+ - Multiple output formats and languages
1432
+ - Action items and key insights extraction
1433
+
1434
+ ### How to Use
1435
+ 1. **Choose Mode**: Use existing transcripts OR upload new media
1436
+ 2. **Add Content**: Include supporting files (presentations, images, documents)
1437
+ 3. **AI Instructions**: Describe meeting context, desired format, corrections needed
1438
+ 4. **Generate**: Click "Generate AI Summary" - processing continues in background
1439
+
1440
+ ### Supported Files
1441
+ - **Media**: MP4, MOV, AVI, WAV, MP3, OGG
1442
+ - **Documents**: PDF, Word, PowerPoint, Excel
1443
+ - **Images**: JPG, PNG, BMP, GIF, TIFF
1444
+ - **Data**: CSV, JSON, TXT
1445
+ """)
1446
+
1447
+ # Column 2: Speech Transcription
1448
+ with gr.Column(scale=1):
1449
+ gr.Markdown("""
1450
+ ## 🎙️ Speech Transcription Service
1451
+
1452
+ **High-accuracy speech-to-text with speaker identification and timestamps**
1453
+
1454
+ ### Key Features
1455
+ - Enterprise-grade Azure speech recognition
1456
+ - Automatic speaker identification and separation
1457
+ - Precise HH:MM:SS timestamps
1458
+ - 50+ languages and dialects supported
1459
+ - Multiple audio/video format support
1460
+
1461
+ ### How to Use
1462
+ 1. **Upload**: Drag/drop audio or video file (max 500MB)
1463
+ 2. **Configure**: Select language, speakers, output format
1464
+ 3. **Settings**: Enable timestamps, speaker ID, profanity filtering
1465
+ 4. **Process**: Click "Start Transcription" - auto-refresh shows progress
1466
+
1467
+ ### Easy Downloads in History Tab
1468
+ - All results saved to your private history
1469
+ - **Simply click the "📥 Download" link in any table row**
1470
+ - Instant downloads for all completed jobs
1471
+ - Files include full content plus metadata
1472
+ - Transcripts saved as TXT, summaries as Markdown
1473
+ """)
1474
+
1475
+ # Privacy & Data Tab
1476
+ with gr.Tab("🔒 Privacy & Data"):
1477
+ gr.Markdown("## 🔒 Privacy & Data Protection")
1478
+ gr.Markdown("Complete information about data handling, privacy protections, and licensing.")
1479
+
1480
+ with gr.Accordion("🔐 Data Privacy & Security", open=True):
1481
+ gr.Markdown("""
1482
+ ### GDPR Compliance & Data Protection
1483
+
1484
+ **Your Rights:**
1485
+ - Right to access your data
1486
+ - Right to rectify incorrect information
1487
+ - Right to erasure ("right to be forgotten")
1488
+ - Right to data portability
1489
+ - Right to object to processing
1490
+
1491
+ **Data Security:**
1492
+ - All data encrypted in transit (HTTPS/TLS)
1493
+ - User-isolated Azure Blob Storage
1494
+ - Enterprise-grade security infrastructure
1495
+ - Regular security updates and monitoring
1496
+
1497
+ **Data Processing:**
1498
+ - Azure Speech Services (transcription)
1499
+ - Azure Computer Vision (image analysis)
1500
+ - Azure OpenAI (AI summarization)
1501
+ - No data sharing with third parties
1502
+ """)
1503
+
1504
+ # Auto-refresh timer
1505
+ timer = gr.Timer(10.0)
1506
+ summary_timer = gr.Timer(15.0)
1507
+
1508
+ # Event handlers
1509
+
1510
+ # Authentication events
1511
+ login_btn.click(
1512
+ login_user,
1513
+ inputs=[login_email, login_password],
1514
+ outputs=[login_status, current_user, auth_section, main_app, user_stats_display]
1515
+ ).then(
1516
+ lambda user: ("", "") if user else (gr.update(), gr.update()),
1517
+ inputs=[current_user],
1518
+ outputs=[login_email, login_password]
1519
+ ).then(
1520
+ get_available_transcripts,
1521
+ inputs=[current_user],
1522
+ outputs=[available_transcripts]
1523
+ ).then(
1524
+ refresh_all_history,
1525
+ inputs=[current_user, show_all_checkbox],
1526
+ outputs=[transcript_table, transcript_summary, summary_table, summary_summary]
1527
+ )
1528
+
1529
+ register_btn.click(
1530
+ register_user,
1531
+ inputs=[reg_email, reg_username, reg_password, reg_confirm_password,
1532
+ gdpr_consent, data_retention_consent, marketing_consent],
1533
+ outputs=[register_status, login_after_register]
1534
+ )
1535
+
1536
+ login_after_register.click(
1537
+ lambda: (gr.update(selected=0), ""),
1538
+ outputs=[auth_tabs, register_status]
1539
+ )
1540
+
1541
+ logout_btn.click(
1542
+ logout_user,
1543
+ outputs=[current_user, login_status, auth_section, main_app, user_stats_display]
1544
+ )
1545
+
1546
+ # Transcription events
1547
+ submit_btn.click(
1548
+ submit_transcription,
1549
+ inputs=[
1550
+ file_upload, language, audio_format, diarization_enabled,
1551
+ speakers, profanity, punctuation, timestamps, lexical, current_user
1552
+ ],
1553
+ outputs=[status_display, transcript_output, download_file, job_info, job_state, auto_refresh_status_display, user_stats_display]
1554
+ )
1555
+
1556
+ refresh_btn.click(
1557
+ check_transcription_status,
1558
+ inputs=[job_state, current_user],
1559
+ outputs=[status_display, transcript_output, download_file]
1560
+ )
1561
+
1562
+ timer.tick(
1563
+ check_transcription_status,
1564
+ inputs=[job_state, current_user],
1565
+ outputs=[status_display, transcript_output, download_file]
1566
+ )
1567
+
1568
+ # AI Summary events
1569
+ refresh_transcripts_btn.click(
1570
+ get_available_transcripts,
1571
+ inputs=[current_user],
1572
+ outputs=[available_transcripts]
1573
+ )
1574
+
1575
+ # Show/hide sections based on processing mode
1576
+ processing_mode.change(
1577
+ lambda choice: (
1578
+ gr.update(visible=choice == "existing_transcripts"),
1579
+ gr.update(visible=choice == "new_media")
1580
+ ),
1581
+ inputs=[processing_mode],
1582
+ outputs=[existing_transcripts_section, new_media_section]
1583
+ )
1584
+
1585
+ submit_summary_btn.click(
1586
+ submit_ai_summary,
1587
+ inputs=[
1588
+ current_user, processing_mode, available_transcripts, media_files,
1589
+ support_files, user_prompt, output_format, output_language, focus_areas,
1590
+ include_timestamps, include_action_items
1591
+ ],
1592
+ outputs=[summary_status_display, summary_output, summary_download_file, summary_job_state, main_tabs]
1593
+ ).then(
1594
+ lambda user: get_user_stats_display(user) if user else "👤 Please log in to view your statistics...",
1595
+ inputs=[current_user],
1596
+ outputs=[user_stats_display]
1597
+ )
1598
+
1599
+ refresh_summary_btn.click(
1600
+ check_summary_status,
1601
+ inputs=[summary_job_state, current_user],
1602
+ outputs=[summary_status_display, summary_output, summary_download_file]
1603
+ )
1604
+
1605
+ summary_timer.tick(
1606
+ check_summary_status,
1607
+ inputs=[summary_job_state, current_user],
1608
+ outputs=[summary_status_display, summary_output, summary_download_file]
1609
+ )
1610
+
1611
+ # History and Download events - IMPROVED WITH TABLE SELECTION
1612
+ refresh_history_btn.click(
1613
+ refresh_all_history,
1614
+ inputs=[current_user, show_all_checkbox],
1615
+ outputs=[transcript_table, transcript_summary, summary_table, summary_summary]
1616
+ )
1617
+
1618
+ show_all_checkbox.change(
1619
+ refresh_all_history,
1620
+ inputs=[current_user, show_all_checkbox],
1621
+ outputs=[transcript_table, transcript_summary, summary_table, summary_summary]
1622
+ )
1623
+
1624
+ # NEW: Table click handlers for instant downloads
1625
+ transcript_table.select(
1626
+ handle_transcript_table_select,
1627
+ inputs=[current_user],
1628
+ outputs=[transcript_download_file]
1629
+ )
1630
+
1631
+ summary_table.select(
1632
+ handle_summary_table_select,
1633
+ inputs=[current_user],
1634
+ outputs=[summary_download_file]
1635
+ )
1636
+
1637
+ # Load history on login
1638
+ current_user.change(
1639
+ refresh_all_history,
1640
+ inputs=[current_user, show_all_checkbox],
1641
+ outputs=[transcript_table, transcript_summary, summary_table, summary_summary]
1642
+ )
1643
+
1644
+ demo.load(
1645
+ lambda: "👤 Please log in to view your statistics...",
1646
+ outputs=[user_stats_display]
1647
+ )
1648
+
1649
+ if __name__ == "__main__":
1650
+ print("🚀 Starting AI Conference Summarization Service...")
1651
+ if AI_FEATURES_AVAILABLE:
1652
+ print("✅ AI features enabled")
1653
+ else:
1654
+ print("⚠️ AI features disabled - check configuration")
1655
+
1656
+ demo.launch(
1657
+ server_name="0.0.0.0",
1658
+ server_port=7860,
1659
+ share=False,
1660
+ show_error=True
1661
+ )
backend.py ADDED
@@ -0,0 +1,1472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import uuid
4
+ import json
5
+ import requests
6
+ import subprocess
7
+ import asyncio
8
+ import threading
9
+ import hashlib
10
+ import re
11
+ from datetime import datetime, timedelta
12
+ from typing import Optional, Dict, List, Tuple
13
+ from dataclasses import dataclass, asdict
14
+ from concurrent.futures import ThreadPoolExecutor
15
+ import sqlite3
16
+ from contextlib import contextmanager
17
+ from dotenv import load_dotenv
18
+ from azure.storage.blob import BlobServiceClient
19
+ import tempfile
20
+ import shutil
21
+
22
+ # Load Environment
23
+ load_dotenv()
24
+
25
+ def _require_env_var(varname):
26
+ value = os.environ.get(varname)
27
+ if not value or value.strip() == "" or "your" in value.lower():
28
+ raise ValueError(f"Environment variable {varname} is missing or invalid. Check your .env file.")
29
+ return value
30
+
31
+ # Environment variables
32
+ AZURE_SPEECH_KEY = _require_env_var("AZURE_SPEECH_KEY")
33
+ AZURE_SPEECH_KEY_ENDPOINT = _require_env_var("AZURE_SPEECH_KEY_ENDPOINT").rstrip('/')
34
+ AZURE_REGION = _require_env_var("AZURE_REGION")
35
+ AZURE_BLOB_CONNECTION = _require_env_var("AZURE_BLOB_CONNECTION")
36
+ AZURE_CONTAINER = _require_env_var("AZURE_CONTAINER")
37
+ AZURE_CONTAINER_CHAT = os.environ.get("AZURE_CONTAINER_CHAT", "response-chat") # New chat container
38
+ AZURE_BLOB_SAS_TOKEN = _require_env_var("AZURE_BLOB_SAS_TOKEN")
39
+ ALLOWED_LANGS = json.loads(os.environ.get("ALLOWED_LANGS", "{}"))
40
+ API_VERSION = os.environ.get("API_VERSION", "v3.2")
41
+
42
+ # Directories
43
+ UPLOAD_DIR = "uploads"
44
+ DB_DIR = "database"
45
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
46
+ os.makedirs(DB_DIR, exist_ok=True)
47
+
48
+ AUDIO_FORMATS = [
49
+ "wav", "mp3", "ogg", "opus", "flac", "wma", "aac", "alaw", "mulaw", "amr", "webm", "speex"
50
+ ]
51
+
52
+ @dataclass
53
+ class User:
54
+ user_id: str
55
+ email: str
56
+ username: str
57
+ password_hash: str
58
+ created_at: str
59
+ last_login: Optional[str] = None
60
+ is_active: bool = True
61
+ gdpr_consent: bool = False
62
+ data_retention_agreed: bool = False
63
+ marketing_consent: bool = False
64
+
65
+ @dataclass
66
+ class TranscriptionJob:
67
+ job_id: str
68
+ user_id: str # Keep user_id for authentication but use simpler transcription logic
69
+ original_filename: str
70
+ audio_url: str
71
+ language: str
72
+ status: str # pending, processing, completed, failed
73
+ created_at: str
74
+ completed_at: Optional[str] = None
75
+ transcript_text: Optional[str] = None
76
+ transcript_url: Optional[str] = None
77
+ error_message: Optional[str] = None
78
+ azure_trans_id: Optional[str] = None
79
+ settings: Optional[Dict] = None
80
+
81
+ @dataclass
82
+ class SummaryJob:
83
+ job_id: str
84
+ user_id: str
85
+ original_files: List[str]
86
+ summary_type: str
87
+ user_prompt: str
88
+ status: str
89
+ created_at: str
90
+ completed_at: Optional[str] = None
91
+ summary_text: Optional[str] = None
92
+ processed_files: Optional[Dict] = None
93
+ extracted_images: Optional[List[str]] = None
94
+ transcript_text: Optional[str] = None
95
+ error_message: Optional[str] = None
96
+ settings: Optional[Dict] = None
97
+
98
+ class AuthManager:
99
+ """Handle user authentication and PDPA compliance"""
100
+
101
+ @staticmethod
102
+ def hash_password(password: str) -> str:
103
+ """Hash password using SHA-256 with salt"""
104
+ salt = "azure_speech_transcription_salt_2024" # In production, use environment variable
105
+ return hashlib.sha256((password + salt).encode()).hexdigest()
106
+
107
+ @staticmethod
108
+ def verify_password(password: str, password_hash: str) -> bool:
109
+ """Verify password against hash"""
110
+ return AuthManager.hash_password(password) == password_hash
111
+
112
+ @staticmethod
113
+ def validate_email(email: str) -> bool:
114
+ """Validate email format"""
115
+ pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
116
+ return re.match(pattern, email) is not None
117
+
118
+ @staticmethod
119
+ def validate_username(username: str) -> bool:
120
+ """Validate username format"""
121
+ # Username: 3-30 characters, alphanumeric and underscore only
122
+ pattern = r'^[a-zA-Z0-9_]{3,30}$'
123
+ return re.match(pattern, username) is not None
124
+
125
+ @staticmethod
126
+ def validate_password(password: str) -> Tuple[bool, str]:
127
+ """Validate password strength"""
128
+ if len(password) < 8:
129
+ return False, "Password must be at least 8 characters long"
130
+ if not re.search(r'[A-Z]', password):
131
+ return False, "Password must contain at least one uppercase letter"
132
+ if not re.search(r'[a-z]', password):
133
+ return False, "Password must contain at least one lowercase letter"
134
+ if not re.search(r'\d', password):
135
+ return False, "Password must contain at least one number"
136
+ return True, "Password is valid"
137
+
138
+ class ExtendedDatabaseManager:
139
+ """Enhanced database manager with AI summary support"""
140
+
141
+ def __init__(self, db_path: str = None):
142
+ self.db_path = db_path or os.path.join(DB_DIR, "transcriptions.db")
143
+ self.blob_service = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION)
144
+ self.db_blob_name = "shared/database/transcriptions.db" # Shared database location
145
+ self._lock = threading.Lock()
146
+ self._last_backup_time = 0
147
+ self._backup_interval = 30 # Backup every 30 seconds at most
148
+
149
+ # Download existing database from blob storage or create new one
150
+ self.init_database()
151
+
152
+ def _download_db_from_blob(self):
153
+ """Download database from Azure Blob Storage if it exists"""
154
+ try:
155
+ blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER, blob=self.db_blob_name)
156
+
157
+ # Check if blob exists
158
+ if blob_client.exists():
159
+ print("📥 Downloading existing shared database from Azure Blob Storage...")
160
+
161
+ # Create temporary file
162
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
163
+ temp_path = temp_file.name
164
+
165
+ # Download blob to temporary file
166
+ with open(temp_path, "wb") as download_file:
167
+ download_file.write(blob_client.download_blob().readall())
168
+
169
+ # Move to final location
170
+ os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
171
+ shutil.move(temp_path, self.db_path)
172
+
173
+ print("✅ Shared database downloaded successfully")
174
+ return True
175
+ else:
176
+ print("📁 No existing shared database found in blob storage, will create new one")
177
+ return False
178
+
179
+ except Exception as e:
180
+ print(f"⚠️ Warning: Could not download shared database from blob storage: {e}")
181
+ print("📁 Will create new local database")
182
+ return False
183
+
184
+ def _upload_db_to_blob(self):
185
+ """Upload database to Azure Blob Storage with rate limiting"""
186
+ try:
187
+ current_time = time.time()
188
+ if current_time - self._last_backup_time < self._backup_interval:
189
+ return # Skip backup if too recent
190
+
191
+ if not os.path.exists(self.db_path):
192
+ return
193
+
194
+ blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER, blob=self.db_blob_name)
195
+
196
+ with open(self.db_path, "rb") as data:
197
+ blob_client.upload_blob(data, overwrite=True)
198
+
199
+ self._last_backup_time = current_time
200
+
201
+ except Exception as e:
202
+ print(f"⚠️ Warning: Could not upload shared database to blob storage: {e}")
203
+
204
+ @contextmanager
205
+ def get_connection(self):
206
+ with self._lock:
207
+ conn = sqlite3.connect(self.db_path, timeout=30.0)
208
+ conn.row_factory = sqlite3.Row
209
+ try:
210
+ yield conn
211
+ finally:
212
+ conn.close()
213
+ # Auto-backup after any database operation (rate limited)
214
+ threading.Thread(target=self._upload_db_to_blob, daemon=True).start()
215
+
216
+ def init_database(self):
217
+ # Try to download existing database first
218
+ self._download_db_from_blob()
219
+
220
+ # Initialize database structure
221
+ with self.get_connection() as conn:
222
+ # Users table
223
+ conn.execute("""
224
+ CREATE TABLE IF NOT EXISTS users (
225
+ user_id TEXT PRIMARY KEY,
226
+ email TEXT UNIQUE NOT NULL,
227
+ username TEXT UNIQUE NOT NULL,
228
+ password_hash TEXT NOT NULL,
229
+ created_at TEXT NOT NULL,
230
+ last_login TEXT,
231
+ is_active BOOLEAN DEFAULT 1,
232
+ gdpr_consent BOOLEAN DEFAULT 0,
233
+ data_retention_agreed BOOLEAN DEFAULT 0,
234
+ marketing_consent BOOLEAN DEFAULT 0
235
+ )
236
+ """)
237
+
238
+ # Transcriptions table
239
+ conn.execute("""
240
+ CREATE TABLE IF NOT EXISTS transcriptions (
241
+ job_id TEXT PRIMARY KEY,
242
+ user_id TEXT NOT NULL,
243
+ original_filename TEXT NOT NULL,
244
+ audio_url TEXT,
245
+ language TEXT NOT NULL,
246
+ status TEXT NOT NULL,
247
+ created_at TEXT NOT NULL,
248
+ completed_at TEXT,
249
+ transcript_text TEXT,
250
+ transcript_url TEXT,
251
+ error_message TEXT,
252
+ azure_trans_id TEXT,
253
+ settings TEXT,
254
+ FOREIGN KEY (user_id) REFERENCES users (user_id)
255
+ )
256
+ """)
257
+
258
+ # AI Summary jobs table - ENHANCED for better storage
259
+ conn.execute("""
260
+ CREATE TABLE IF NOT EXISTS summary_jobs (
261
+ job_id TEXT PRIMARY KEY,
262
+ user_id TEXT NOT NULL,
263
+ summary_type TEXT NOT NULL,
264
+ user_prompt TEXT,
265
+ status TEXT NOT NULL,
266
+ created_at TEXT NOT NULL,
267
+ completed_at TEXT,
268
+ summary_text TEXT,
269
+ summary_url TEXT, -- URL to blob storage file
270
+ processed_files TEXT, -- JSON string
271
+ extracted_images TEXT, -- JSON string of image URLs
272
+ transcript_job_ids TEXT, -- JSON string of referenced transcript job IDs
273
+ original_files TEXT, -- JSON string of original file names
274
+ error_message TEXT,
275
+ settings TEXT, -- JSON string
276
+ FOREIGN KEY (user_id) REFERENCES users (user_id)
277
+ )
278
+ """)
279
+
280
+ # Create indexes
281
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_users_email ON users(email)")
282
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_users_username ON users(username)")
283
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_transcriptions_user_id ON transcriptions(user_id)")
284
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_transcriptions_status ON transcriptions(status)")
285
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_transcriptions_created_at ON transcriptions(created_at DESC)")
286
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_transcriptions_user_created ON transcriptions(user_id, created_at DESC)")
287
+
288
+ # Summary job indexes
289
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_summary_jobs_user_id ON summary_jobs(user_id)")
290
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_summary_jobs_status ON summary_jobs(status)")
291
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_summary_jobs_created_at ON summary_jobs(created_at DESC)")
292
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_summary_jobs_user_created ON summary_jobs(user_id, created_at DESC)")
293
+
294
+ conn.commit()
295
+ print("✅ Database initialized with AI summary support")
296
+
297
+ # User management methods
298
+ def create_user(self, email: str, username: str, password: str, gdpr_consent: bool = True,
299
+ data_retention_agreed: bool = True, marketing_consent: bool = False) -> Tuple[bool, str, Optional[str]]:
300
+ """Create new user account"""
301
+ try:
302
+ # Validate inputs
303
+ if not AuthManager.validate_email(email):
304
+ return False, "Invalid email format", None
305
+
306
+ if not AuthManager.validate_username(username):
307
+ return False, "Username must be 3-30 characters, alphanumeric and underscore only", None
308
+
309
+ is_valid, message = AuthManager.validate_password(password)
310
+ if not is_valid:
311
+ return False, message, None
312
+
313
+ if not gdpr_consent:
314
+ return False, "GDPR consent is required to create an account", None
315
+
316
+ if not data_retention_agreed:
317
+ return False, "Data retention agreement is required", None
318
+
319
+ user_id = str(uuid.uuid4())
320
+ password_hash = AuthManager.hash_password(password)
321
+
322
+ with self.get_connection() as conn:
323
+ # Check if email or username already exists
324
+ existing = conn.execute(
325
+ "SELECT email, username FROM users WHERE email = ? OR username = ?",
326
+ (email, username)
327
+ ).fetchone()
328
+
329
+ if existing:
330
+ if existing['email'] == email:
331
+ return False, "Email already registered", None
332
+ else:
333
+ return False, "Username already taken", None
334
+
335
+ # Create user
336
+ user = User(
337
+ user_id=user_id,
338
+ email=email,
339
+ username=username,
340
+ password_hash=password_hash,
341
+ created_at=datetime.now().isoformat(),
342
+ gdpr_consent=gdpr_consent,
343
+ data_retention_agreed=data_retention_agreed,
344
+ marketing_consent=marketing_consent
345
+ )
346
+
347
+ conn.execute("""
348
+ INSERT INTO users
349
+ (user_id, email, username, password_hash, created_at, is_active,
350
+ gdpr_consent, data_retention_agreed, marketing_consent)
351
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
352
+ """, (
353
+ user.user_id, user.email, user.username, user.password_hash,
354
+ user.created_at, user.is_active, user.gdpr_consent,
355
+ user.data_retention_agreed, user.marketing_consent
356
+ ))
357
+ conn.commit()
358
+
359
+ print(f"👤 New user registered: {username} ({email})")
360
+ return True, "Account created successfully", user_id
361
+
362
+ except Exception as e:
363
+ print(f"❌ Error creating user: {str(e)}")
364
+ return False, f"Registration failed: {str(e)}", None
365
+
366
+ def authenticate_user(self, login: str, password: str) -> Tuple[bool, str, Optional[User]]:
367
+ """Authenticate user by email or username"""
368
+ try:
369
+ with self.get_connection() as conn:
370
+ # Find user by email or username
371
+ user_row = conn.execute("""
372
+ SELECT * FROM users
373
+ WHERE (email = ? OR username = ?) AND is_active = 1
374
+ """, (login, login)).fetchone()
375
+
376
+ if not user_row:
377
+ return False, "Invalid credentials", None
378
+
379
+ # Verify password
380
+ if not AuthManager.verify_password(password, user_row['password_hash']):
381
+ return False, "Invalid credentials", None
382
+
383
+ # Update last login
384
+ conn.execute(
385
+ "UPDATE users SET last_login = ? WHERE user_id = ?",
386
+ (datetime.now().isoformat(), user_row['user_id'])
387
+ )
388
+ conn.commit()
389
+
390
+ # Convert to User object
391
+ user = User(
392
+ user_id=user_row['user_id'],
393
+ email=user_row['email'],
394
+ username=user_row['username'],
395
+ password_hash=user_row['password_hash'],
396
+ created_at=user_row['created_at'],
397
+ last_login=datetime.now().isoformat(),
398
+ is_active=bool(user_row['is_active']),
399
+ gdpr_consent=bool(user_row['gdpr_consent']),
400
+ data_retention_agreed=bool(user_row['data_retention_agreed']),
401
+ marketing_consent=bool(user_row['marketing_consent'])
402
+ )
403
+
404
+ print(f"🔐 User logged in: {user.username} ({user.email})")
405
+ return True, "Login successful", user
406
+
407
+ except Exception as e:
408
+ print(f"❌ Authentication error: {str(e)}")
409
+ return False, f"Login failed: {str(e)}", None
410
+
411
+ def get_user_by_id(self, user_id: str) -> Optional[User]:
412
+ """Get user by ID"""
413
+ try:
414
+ with self.get_connection() as conn:
415
+ user_row = conn.execute(
416
+ "SELECT * FROM users WHERE user_id = ? AND is_active = 1",
417
+ (user_id,)
418
+ ).fetchone()
419
+
420
+ if user_row:
421
+ return User(
422
+ user_id=user_row['user_id'],
423
+ email=user_row['email'],
424
+ username=user_row['username'],
425
+ password_hash=user_row['password_hash'],
426
+ created_at=user_row['created_at'],
427
+ last_login=user_row['last_login'],
428
+ is_active=bool(user_row['is_active']),
429
+ gdpr_consent=bool(user_row['gdpr_consent']),
430
+ data_retention_agreed=bool(user_row['data_retention_agreed']),
431
+ marketing_consent=bool(user_row['marketing_consent'])
432
+ )
433
+ except Exception as e:
434
+ print(f"❌ Error getting user: {str(e)}")
435
+ return None
436
+
437
+ def update_user_consent(self, user_id: str, marketing_consent: bool) -> bool:
438
+ """Update user marketing consent"""
439
+ try:
440
+ with self.get_connection() as conn:
441
+ conn.execute(
442
+ "UPDATE users SET marketing_consent = ? WHERE user_id = ?",
443
+ (marketing_consent, user_id)
444
+ )
445
+ conn.commit()
446
+ return True
447
+ except Exception as e:
448
+ print(f"❌ Error updating consent: {str(e)}")
449
+ return False
450
+
451
+ def delete_user_account(self, user_id: str) -> bool:
452
+ """Delete user account and all associated data (GDPR compliance)"""
453
+ try:
454
+ with self.get_connection() as conn:
455
+ # Delete all transcriptions and summaries
456
+ conn.execute("DELETE FROM transcriptions WHERE user_id = ?", (user_id,))
457
+ conn.execute("DELETE FROM summary_jobs WHERE user_id = ?", (user_id,))
458
+
459
+ # Deactivate user (for audit trail) rather than delete
460
+ conn.execute(
461
+ "UPDATE users SET is_active = 0, email = ?, username = ? WHERE user_id = ?",
462
+ (f"deleted_{user_id}@deleted.com", f"deleted_{user_id}", user_id)
463
+ )
464
+ conn.commit()
465
+ print(f"🗑️ User account deleted: {user_id}")
466
+ return True
467
+ except Exception as e:
468
+ print(f"❌ Error deleting user account: {str(e)}")
469
+ return False
470
+
471
+ # Transcription methods
472
+ def save_job(self, job: TranscriptionJob):
473
+ with self.get_connection() as conn:
474
+ conn.execute("""
475
+ INSERT OR REPLACE INTO transcriptions
476
+ (job_id, user_id, original_filename, audio_url, language, status,
477
+ created_at, completed_at, transcript_text, transcript_url, error_message,
478
+ azure_trans_id, settings)
479
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
480
+ """, (
481
+ job.job_id, job.user_id, job.original_filename, job.audio_url,
482
+ job.language, job.status, job.created_at, job.completed_at,
483
+ job.transcript_text, job.transcript_url, job.error_message,
484
+ job.azure_trans_id, json.dumps(job.settings) if job.settings else None
485
+ ))
486
+ conn.commit()
487
+
488
+ def get_job(self, job_id: str) -> Optional[TranscriptionJob]:
489
+ with self.get_connection() as conn:
490
+ row = conn.execute(
491
+ "SELECT * FROM transcriptions WHERE job_id = ?", (job_id,)
492
+ ).fetchone()
493
+ if row:
494
+ return self._row_to_job(row)
495
+ return None
496
+
497
+ def get_user_jobs(self, user_id: str, limit: int = 50) -> List[TranscriptionJob]:
498
+ """Get all transcription jobs for a specific user - PDPA compliant"""
499
+ with self.get_connection() as conn:
500
+ rows = conn.execute("""
501
+ SELECT * FROM transcriptions
502
+ WHERE user_id = ?
503
+ ORDER BY created_at DESC
504
+ LIMIT ?
505
+ """, (user_id, limit)).fetchall()
506
+ return [self._row_to_job(row) for row in rows]
507
+
508
+ def get_all_jobs(self, limit: int = 100) -> List[TranscriptionJob]:
509
+ """Get all jobs across all users (for admin/global view)"""
510
+ with self.get_connection() as conn:
511
+ rows = conn.execute("""
512
+ SELECT * FROM transcriptions
513
+ ORDER BY created_at DESC
514
+ LIMIT ?
515
+ """, (limit,)).fetchall()
516
+ return [self._row_to_job(row) for row in rows]
517
+
518
+ def get_pending_jobs(self) -> List[TranscriptionJob]:
519
+ """Get pending jobs across all users for background processing"""
520
+ with self.get_connection() as conn:
521
+ rows = conn.execute(
522
+ "SELECT * FROM transcriptions WHERE status IN ('pending', 'processing')"
523
+ ).fetchall()
524
+ return [self._row_to_job(row) for row in rows]
525
+
526
+ # AI Summary methods - ENHANCED
527
+ def save_summary_job(self, job: SummaryJob):
528
+ """Save AI summary job to database with enhanced storage"""
529
+ try:
530
+ with self.get_connection() as conn:
531
+ # Store summary text to blob storage if completed
532
+ summary_url = None
533
+ if job.summary_text and job.status == 'completed':
534
+ summary_url = self._store_summary_to_blob(job.job_id, job.summary_text, job.user_id)
535
+
536
+ conn.execute("""
537
+ INSERT OR REPLACE INTO summary_jobs
538
+ (job_id, user_id, summary_type, user_prompt, status, created_at,
539
+ completed_at, summary_text, summary_url, processed_files, extracted_images,
540
+ transcript_job_ids, original_files, error_message, settings)
541
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
542
+ """, (
543
+ job.job_id, job.user_id, job.summary_type, job.user_prompt,
544
+ job.status, job.created_at, job.completed_at,
545
+ job.summary_text, summary_url,
546
+ json.dumps(job.processed_files) if job.processed_files else None,
547
+ json.dumps(job.extracted_images) if job.extracted_images else None,
548
+ json.dumps(job.transcript_text) if hasattr(job, 'transcript_job_ids') and job.transcript_text else None,
549
+ json.dumps(job.original_files) if job.original_files else None,
550
+ job.error_message,
551
+ json.dumps(job.settings) if job.settings else None
552
+ ))
553
+ conn.commit()
554
+ print(f"💾 Summary job saved: {job.job_id[:8]}... (status: {job.status})")
555
+ except Exception as e:
556
+ print(f"❌ Error saving summary job: {e}")
557
+
558
+ def _store_summary_to_blob(self, job_id: str, summary_text: str, user_id: str) -> Optional[str]:
559
+ """Store summary text to Azure Blob Storage"""
560
+ try:
561
+ # Create blob path for user's summary in main container
562
+ blob_name = f"users/{user_id}/summaries/{job_id}_summary.md"
563
+
564
+ # Create markdown content with metadata
565
+ content = f"""# AI Conference Summary
566
+ *Job ID: {job_id}*
567
+ *Generated: {datetime.now().isoformat()}*
568
+ *User: {user_id}*
569
+
570
+ ---
571
+
572
+ {summary_text}
573
+
574
+ ---
575
+ *Generated by AI Conference Summarization Service*
576
+ """
577
+
578
+ # Upload to main blob storage
579
+ blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER, blob=blob_name)
580
+ blob_client.upload_blob(content.encode('utf-8'), overwrite=True)
581
+
582
+ # Also store to chat response container
583
+ self._store_chat_response(job_id, summary_text, user_id)
584
+
585
+ # Return blob URL with SAS token
586
+ sas = AZURE_BLOB_SAS_TOKEN.lstrip("?")
587
+ summary_url = f"{blob_client.url}?{sas}"
588
+
589
+ print(f"☁️ Summary stored to blob: {blob_name}")
590
+ return summary_url
591
+
592
+ except Exception as e:
593
+ print(f"❌ Error storing summary to blob: {e}")
594
+ return None
595
+
596
+ def _store_chat_response(self, job_id: str, response_text: str, user_id: str) -> Optional[str]:
597
+ """Store chat response to dedicated response-chat container"""
598
+ try:
599
+ # Create blob path for chat response
600
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
601
+ chat_blob_name = f"users/{user_id}/responses/{timestamp}_{job_id}.txt"
602
+
603
+ # Create response content with metadata
604
+ content = f"""User ID: {user_id}
605
+ Job ID: {job_id}
606
+ Timestamp: {datetime.now().isoformat()}
607
+ Type: AI_Summary_Response
608
+
609
+ ---
610
+
611
+ {response_text}
612
+ """
613
+
614
+ # Upload to chat response container
615
+ chat_blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER_CHAT, blob=chat_blob_name)
616
+ chat_blob_client.upload_blob(content.encode('utf-8'), overwrite=True)
617
+
618
+ # Return chat blob URL with SAS token
619
+ sas = AZURE_BLOB_SAS_TOKEN.lstrip("?")
620
+ chat_url = f"{chat_blob_client.url}?{sas}"
621
+
622
+ print(f"💬 Chat response stored to: {AZURE_CONTAINER_CHAT}/{chat_blob_name}")
623
+ return chat_url
624
+
625
+ except Exception as e:
626
+ print(f"❌ Error storing chat response: {e}")
627
+ return None
628
+
629
+ def get_user_chat_responses(self, user_id: str, limit: int = 50) -> List[Dict]:
630
+ """Get user's chat responses from the response-chat container"""
631
+ try:
632
+ container_client = self.blob_service.get_container_client(AZURE_CONTAINER_CHAT)
633
+ responses = []
634
+
635
+ # List blobs for this user
636
+ blob_prefix = f"users/{user_id}/responses/"
637
+ blob_list = container_client.list_blobs(name_starts_with=blob_prefix)
638
+
639
+ # Sort by last modified (newest first) and limit
640
+ sorted_blobs = sorted(blob_list, key=lambda x: x.last_modified, reverse=True)[:limit]
641
+
642
+ for blob in sorted_blobs:
643
+ try:
644
+ # Download blob content
645
+ blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER_CHAT, blob=blob.name)
646
+ content = blob_client.download_blob().readall().decode('utf-8')
647
+
648
+ # Parse metadata from content
649
+ lines = content.split('\n')
650
+ metadata = {}
651
+ content_start = 0
652
+
653
+ for i, line in enumerate(lines):
654
+ if line.strip() == '---':
655
+ content_start = i + 1
656
+ break
657
+ if ':' in line:
658
+ key, value = line.split(':', 1)
659
+ metadata[key.strip()] = value.strip()
660
+
661
+ response_content = '\n'.join(lines[content_start:]).strip()
662
+
663
+ responses.append({
664
+ 'blob_name': blob.name,
665
+ 'job_id': metadata.get('Job ID', ''),
666
+ 'timestamp': metadata.get('Timestamp', ''),
667
+ 'type': metadata.get('Type', 'Unknown'),
668
+ 'content': response_content,
669
+ 'last_modified': blob.last_modified.isoformat(),
670
+ 'size': blob.size
671
+ })
672
+
673
+ except Exception as e:
674
+ print(f"Error processing chat response blob {blob.name}: {e}")
675
+ continue
676
+
677
+ return responses
678
+
679
+ except Exception as e:
680
+ print(f"❌ Error getting user chat responses: {e}")
681
+ return []
682
+
683
+ def get_summary_job(self, job_id: str) -> Optional[SummaryJob]:
684
+ """Get AI summary job by ID"""
685
+ with self.get_connection() as conn:
686
+ row = conn.execute(
687
+ "SELECT * FROM summary_jobs WHERE job_id = ?", (job_id,)
688
+ ).fetchone()
689
+ if row:
690
+ return self._row_to_summary_job(row)
691
+ return None
692
+
693
+ def get_user_summary_jobs(self, user_id: str, limit: int = 50) -> List[SummaryJob]:
694
+ """Get AI summary jobs for a specific user"""
695
+ with self.get_connection() as conn:
696
+ rows = conn.execute("""
697
+ SELECT * FROM summary_jobs
698
+ WHERE user_id = ?
699
+ ORDER BY created_at DESC
700
+ LIMIT ?
701
+ """, (user_id, limit)).fetchall()
702
+ return [self._row_to_summary_job(row) for row in rows]
703
+
704
+ def get_pending_summary_jobs(self) -> List[SummaryJob]:
705
+ """Get pending AI summary jobs for background processing"""
706
+ with self.get_connection() as conn:
707
+ rows = conn.execute(
708
+ "SELECT * FROM summary_jobs WHERE status IN ('pending', 'processing')"
709
+ ).fetchall()
710
+ return [self._row_to_summary_job(row) for row in rows]
711
+
712
+ # Statistics methods - ENHANCED
713
+ def get_user_stats(self, user_id: str) -> Dict:
714
+ """Get basic statistics for a specific user"""
715
+ with self.get_connection() as conn:
716
+ stats = {}
717
+
718
+ # Total transcription jobs
719
+ result = conn.execute("""
720
+ SELECT COUNT(*) FROM transcriptions WHERE user_id = ?
721
+ """, (user_id,)).fetchone()
722
+ stats['total_jobs'] = result[0] if result else 0
723
+
724
+ # Transcription jobs by status
725
+ result = conn.execute("""
726
+ SELECT status, COUNT(*) FROM transcriptions
727
+ WHERE user_id = ?
728
+ GROUP BY status
729
+ """, (user_id,)).fetchall()
730
+ stats['by_status'] = {row[0]: row[1] for row in result}
731
+
732
+ # Recent activity (last 7 days)
733
+ week_ago = (datetime.now() - timedelta(days=7)).isoformat()
734
+ result = conn.execute("""
735
+ SELECT COUNT(*) FROM transcriptions
736
+ WHERE user_id = ? AND created_at >= ?
737
+ """, (user_id, week_ago)).fetchone()
738
+ stats['recent_jobs'] = result[0] if result else 0
739
+
740
+ return stats
741
+
742
+ def get_user_stats_extended(self, user_id: str) -> Dict:
743
+ """Get extended user statistics including AI summaries"""
744
+ stats = self.get_user_stats(user_id) # Get existing transcription stats
745
+
746
+ with self.get_connection() as conn:
747
+ # Add summary job stats
748
+ result = conn.execute("""
749
+ SELECT COUNT(*) FROM summary_jobs WHERE user_id = ?
750
+ """, (user_id,)).fetchone()
751
+ stats['total_summary_jobs'] = result[0] if result else 0
752
+
753
+ # Summary jobs by status
754
+ result = conn.execute("""
755
+ SELECT status, COUNT(*) FROM summary_jobs
756
+ WHERE user_id = ?
757
+ GROUP BY status
758
+ """, (user_id,)).fetchall()
759
+ stats['summary_by_status'] = {row[0]: row[1] for row in result}
760
+
761
+ # Recent summary activity (last 7 days)
762
+ week_ago = (datetime.now() - timedelta(days=7)).isoformat()
763
+ result = conn.execute("""
764
+ SELECT COUNT(*) FROM summary_jobs
765
+ WHERE user_id = ? AND created_at >= ?
766
+ """, (user_id, week_ago)).fetchone()
767
+ stats['recent_summary_jobs'] = result[0] if result else 0
768
+
769
+ return stats
770
+
771
+ def export_user_data(self, user_id: str) -> Dict:
772
+ """Export all user data for GDPR compliance"""
773
+ try:
774
+ with self.get_connection() as conn:
775
+ # Get user info
776
+ user_row = conn.execute(
777
+ "SELECT * FROM users WHERE user_id = ?", (user_id,)
778
+ ).fetchone()
779
+
780
+ # Get all transcriptions
781
+ transcription_rows = conn.execute(
782
+ "SELECT * FROM transcriptions WHERE user_id = ?", (user_id,)
783
+ ).fetchall()
784
+
785
+ # Get all AI summaries
786
+ summary_rows = conn.execute(
787
+ "SELECT * FROM summary_jobs WHERE user_id = ?", (user_id,)
788
+ ).fetchall()
789
+
790
+ export_data = {
791
+ "export_date": datetime.now().isoformat(),
792
+ "user_info": dict(user_row) if user_row else {},
793
+ "transcriptions": [dict(row) for row in transcription_rows],
794
+ "ai_summaries": [dict(row) for row in summary_rows],
795
+ "statistics": self.get_user_stats_extended(user_id)
796
+ }
797
+
798
+ return export_data
799
+
800
+ except Exception as e:
801
+ print(f"❌ Error exporting user data: {str(e)}")
802
+ return {}
803
+
804
+ def _row_to_job(self, row) -> TranscriptionJob:
805
+ settings = json.loads(row['settings']) if row['settings'] else None
806
+ return TranscriptionJob(
807
+ job_id=row['job_id'],
808
+ user_id=row['user_id'],
809
+ original_filename=row['original_filename'],
810
+ audio_url=row['audio_url'],
811
+ language=row['language'],
812
+ status=row['status'],
813
+ created_at=row['created_at'],
814
+ completed_at=row['completed_at'],
815
+ transcript_text=row['transcript_text'],
816
+ transcript_url=row['transcript_url'],
817
+ error_message=row['error_message'],
818
+ azure_trans_id=row['azure_trans_id'],
819
+ settings=settings
820
+ )
821
+
822
+ def _row_to_summary_job(self, row) -> SummaryJob:
823
+ """Convert database row to SummaryJob object"""
824
+ processed_files = json.loads(row['processed_files']) if row['processed_files'] else None
825
+ extracted_images = json.loads(row['extracted_images']) if row['extracted_images'] else None
826
+ original_files = json.loads(row['original_files']) if row['original_files'] else []
827
+ settings = json.loads(row['settings']) if row['settings'] else None
828
+
829
+ return SummaryJob(
830
+ job_id=row['job_id'],
831
+ user_id=row['user_id'],
832
+ original_files=original_files,
833
+ summary_type=row['summary_type'],
834
+ user_prompt=row['user_prompt'],
835
+ status=row['status'],
836
+ created_at=row['created_at'],
837
+ completed_at=row['completed_at'],
838
+ summary_text=row['summary_text'],
839
+ processed_files=processed_files,
840
+ extracted_images=extracted_images,
841
+ error_message=row['error_message'],
842
+ settings=settings
843
+ )
844
+
845
+ class TranscriptionManager:
846
+ def __init__(self):
847
+ self.db = ExtendedDatabaseManager()
848
+ self.executor = ThreadPoolExecutor(max_workers=5)
849
+ self.blob_service = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION)
850
+ self._job_status_cache = {} # Cache to track status changes
851
+
852
+ # Start background worker
853
+ self.running = True
854
+ self.worker_thread = threading.Thread(target=self._background_worker, daemon=True)
855
+ self.worker_thread.start()
856
+
857
+ def _log_status_change(self, job_id: str, old_status: str, new_status: str, filename: str = "", user_id: str = ""):
858
+ """Only log when status actually changes"""
859
+ cache_key = f"{job_id}_{old_status}_{new_status}"
860
+ if cache_key not in self._job_status_cache:
861
+ self._job_status_cache[cache_key] = True
862
+ user_display = f"[{user_id[:8]}...]" if user_id else ""
863
+ if filename:
864
+ print(f"🔄 {user_display} Job {job_id[:8]}... ({filename}): {old_status} → {new_status}")
865
+ else:
866
+ print(f"🔄 {user_display} Job {job_id[:8]}...: {old_status} → {new_status}")
867
+
868
+ def _background_worker(self):
869
+ """Background worker to process pending transcriptions - minimal logging"""
870
+ iteration_count = 0
871
+ while self.running:
872
+ try:
873
+ pending_jobs = self.db.get_pending_jobs()
874
+
875
+ # Only log if there are jobs to process
876
+ if pending_jobs and iteration_count % 6 == 0: # Log every minute (6 * 10 seconds)
877
+ active_jobs = len([j for j in pending_jobs if j.status == 'processing'])
878
+ queued_jobs = len([j for j in pending_jobs if j.status == 'pending'])
879
+ if active_jobs > 0 or queued_jobs > 0:
880
+ print(f"📊 Background worker: {active_jobs} processing, {queued_jobs} queued")
881
+
882
+ for job in pending_jobs:
883
+ if job.status == 'pending':
884
+ self.executor.submit(self._process_transcription_job, job.job_id)
885
+ elif job.status == 'processing' and job.azure_trans_id:
886
+ self.executor.submit(self._check_transcription_status, job.job_id)
887
+
888
+ time.sleep(10) # Check every 10 seconds
889
+ iteration_count += 1
890
+
891
+ except Exception as e:
892
+ print(f"❌ Background worker error: {e}")
893
+ time.sleep(30)
894
+
895
+ def submit_transcription(
896
+ self,
897
+ file_bytes: bytes,
898
+ original_filename: str,
899
+ user_id: str, # Use user_id for authentication
900
+ language: str,
901
+ settings: Dict
902
+ ) -> str:
903
+ """Submit a new transcription job for authenticated user"""
904
+ job_id = str(uuid.uuid4())
905
+
906
+ print(f"🚀 [{user_id[:8]}...] New transcription: {original_filename} ({len(file_bytes):,} bytes)")
907
+
908
+ # Create job record
909
+ job = TranscriptionJob(
910
+ job_id=job_id,
911
+ user_id=user_id,
912
+ original_filename=original_filename,
913
+ audio_url="", # Will be set after upload
914
+ language=language,
915
+ status="pending",
916
+ created_at=datetime.now().isoformat(),
917
+ settings=settings
918
+ )
919
+
920
+ # Save job to database
921
+ self.db.save_job(job)
922
+
923
+ # Submit file processing to thread pool
924
+ self.executor.submit(self._prepare_audio_file, job_id, file_bytes, original_filename, settings)
925
+
926
+ return job_id
927
+
928
+ def _prepare_audio_file(self, job_id: str, file_bytes: bytes, original_filename: str, settings: Dict):
929
+ """Prepare audio file and upload to blob storage"""
930
+ try:
931
+ job = self.db.get_job(job_id)
932
+ if not job:
933
+ return
934
+
935
+ user_id = job.user_id
936
+
937
+ # Save original file
938
+ src_ext = original_filename.split('.')[-1].lower() if '.' in original_filename else "bin"
939
+ upload_path = os.path.join(UPLOAD_DIR, f"{job_id}_original.{src_ext}")
940
+
941
+ with open(upload_path, "wb") as f:
942
+ f.write(file_bytes)
943
+
944
+ # Determine if conversion is needed
945
+ audio_format = settings.get('audio_format', 'wav')
946
+
947
+ # Check if file is already in target format and specs
948
+ if src_ext == audio_format and audio_format == 'wav':
949
+ # Check if it's already 16kHz mono (Azure Speech preferred format)
950
+ try:
951
+ probe_cmd = [
952
+ 'ffprobe', '-v', 'quiet', '-print_format', 'json',
953
+ '-show_streams', upload_path
954
+ ]
955
+ result = subprocess.run(probe_cmd, capture_output=True, text=True, timeout=30)
956
+
957
+ if result.returncode == 0:
958
+ import json
959
+ probe_data = json.loads(result.stdout)
960
+ audio_stream = probe_data.get('streams', [{}])[0]
961
+
962
+ sample_rate = int(audio_stream.get('sample_rate', 0))
963
+ channels = int(audio_stream.get('channels', 0))
964
+
965
+ # If already optimal format, use as-is
966
+ if sample_rate == 16000 and channels == 1:
967
+ out_path = upload_path # Use original file
968
+ else:
969
+ print(f"🔄 [{user_id[:8]}...] Converting {original_filename} to 16kHz mono")
970
+ out_path = os.path.join(UPLOAD_DIR, f"{job_id}_converted.{audio_format}")
971
+ self._convert_to_audio(upload_path, out_path, audio_format)
972
+ else:
973
+ out_path = os.path.join(UPLOAD_DIR, f"{job_id}_converted.{audio_format}")
974
+ self._convert_to_audio(upload_path, out_path, audio_format)
975
+
976
+ except Exception as e:
977
+ print(f"⚠️ [{user_id[:8]}...] Audio probing failed for {original_filename}: {e}")
978
+ out_path = os.path.join(UPLOAD_DIR, f"{job_id}_converted.{audio_format}")
979
+ self._convert_to_audio(upload_path, out_path, audio_format)
980
+ else:
981
+ # Different format, need conversion
982
+ print(f"🔄 [{user_id[:8]}...] Converting {original_filename}: {src_ext} → {audio_format}")
983
+ out_path = os.path.join(UPLOAD_DIR, f"{job_id}_converted.{audio_format}")
984
+
985
+ try:
986
+ self._convert_to_audio(upload_path, out_path, audio_format)
987
+ except Exception as e:
988
+ print(f"❌ [{user_id[:8]}...] Audio conversion failed for {original_filename}: {str(e)}")
989
+ job.status = "failed"
990
+ job.error_message = f"Audio conversion failed: {str(e)}"
991
+ job.completed_at = datetime.now().isoformat()
992
+ self.db.save_job(job)
993
+
994
+ # Clean up files
995
+ try:
996
+ os.remove(upload_path)
997
+ except:
998
+ pass
999
+ return
1000
+
1001
+ # Upload to blob storage
1002
+ try:
1003
+ # Upload the processed audio file
1004
+ final_audio_name = f"users/{user_id}/audio/{job_id}.{audio_format}"
1005
+ audio_url = self._upload_blob(out_path, final_audio_name)
1006
+
1007
+ # Upload original file to blob storage (only if different from processed)
1008
+ if out_path != upload_path:
1009
+ orig_blob_name = f"users/{user_id}/originals/{job_id}_{original_filename}"
1010
+ self._upload_blob(upload_path, orig_blob_name)
1011
+ else:
1012
+ # If we used the original file as-is, still store it as original
1013
+ orig_blob_name = f"users/{user_id}/originals/{job_id}_{original_filename}"
1014
+ self._upload_blob(upload_path, orig_blob_name)
1015
+
1016
+ print(f"☁️ [{user_id[:8]}...] {original_filename} uploaded to blob storage")
1017
+
1018
+ # Update job with audio URL
1019
+ job.audio_url = audio_url
1020
+ job.status = "pending"
1021
+ self.db.save_job(job)
1022
+
1023
+ except Exception as e:
1024
+ print(f"❌ [{user_id[:8]}...] Blob upload failed for {original_filename}: {str(e)}")
1025
+ job.status = "failed"
1026
+ job.error_message = f"Blob storage upload failed: {str(e)}"
1027
+ job.completed_at = datetime.now().isoformat()
1028
+ self.db.save_job(job)
1029
+
1030
+ # Clean up local files
1031
+ try:
1032
+ if os.path.exists(upload_path):
1033
+ os.remove(upload_path)
1034
+ if out_path != upload_path and os.path.exists(out_path):
1035
+ os.remove(out_path)
1036
+ except Exception as e:
1037
+ print(f"⚠️ [{user_id[:8]}...] Warning: Could not clean up local files for {original_filename}: {e}")
1038
+
1039
+ except Exception as e:
1040
+ print(f"❌ File preparation error for {original_filename}: {e}")
1041
+ job = self.db.get_job(job_id)
1042
+ if job:
1043
+ job.status = "failed"
1044
+ job.error_message = f"File preparation failed: {str(e)}"
1045
+ job.completed_at = datetime.now().isoformat()
1046
+ self.db.save_job(job)
1047
+
1048
+ def _process_transcription_job(self, job_id: str):
1049
+ """Process a transcription job"""
1050
+ try:
1051
+ job = self.db.get_job(job_id)
1052
+ if not job or job.status != 'pending' or not job.audio_url:
1053
+ return
1054
+
1055
+ old_status = job.status
1056
+ # Update status to processing
1057
+ job.status = "processing"
1058
+ self.db.save_job(job)
1059
+
1060
+ self._log_status_change(job_id, old_status, job.status, job.original_filename, job.user_id)
1061
+
1062
+ # Create Azure transcription
1063
+ settings = job.settings or {}
1064
+ azure_trans_id = self._create_transcription(
1065
+ job.audio_url,
1066
+ job.language,
1067
+ settings.get('diarization_enabled', False),
1068
+ settings.get('speakers', 2),
1069
+ settings.get('profanity', 'masked'),
1070
+ settings.get('punctuation', 'automatic'),
1071
+ settings.get('timestamps', True),
1072
+ settings.get('lexical', False),
1073
+ settings.get('language_id_enabled', False),
1074
+ settings.get('candidate_locales', None)
1075
+ )
1076
+
1077
+ # Update job with Azure transcription ID
1078
+ job.azure_trans_id = azure_trans_id
1079
+ self.db.save_job(job)
1080
+
1081
+ except Exception as e:
1082
+ print(f"❌ Transcription submission failed for job {job_id[:8]}...: {str(e)}")
1083
+ job = self.db.get_job(job_id)
1084
+ if job:
1085
+ old_status = job.status
1086
+ job.status = "failed"
1087
+ job.error_message = f"Transcription submission failed: {str(e)}"
1088
+ job.completed_at = datetime.now().isoformat()
1089
+ self.db.save_job(job)
1090
+ self._log_status_change(job_id, old_status, job.status, job.original_filename, job.user_id)
1091
+
1092
+ def _check_transcription_status(self, job_id: str):
1093
+ """Check status of Azure transcription"""
1094
+ try:
1095
+ job = self.db.get_job(job_id)
1096
+ if not job or job.status != 'processing' or not job.azure_trans_id:
1097
+ return
1098
+
1099
+ # Check Azure transcription status
1100
+ url = f"{AZURE_SPEECH_KEY_ENDPOINT}/speechtotext/{API_VERSION}/transcriptions/{job.azure_trans_id}"
1101
+ headers = {"Ocp-Apim-Subscription-Key": AZURE_SPEECH_KEY}
1102
+
1103
+ r = requests.get(url, headers=headers)
1104
+ data = r.json()
1105
+
1106
+ if data.get("status") == "Succeeded":
1107
+ # Get transcription result
1108
+ content_url = self._get_transcription_result_url(job.azure_trans_id)
1109
+ if content_url:
1110
+ transcript = self._fetch_transcript(content_url)
1111
+
1112
+ # Save transcript to blob storage
1113
+ transcript_blob_name = f"users/{job.user_id}/transcripts/{job_id}.txt"
1114
+ transcript_path = os.path.join(UPLOAD_DIR, f"{job_id}_transcript.txt")
1115
+
1116
+ with open(transcript_path, "w", encoding="utf-8") as f:
1117
+ f.write(transcript)
1118
+
1119
+ transcript_url = self._upload_blob(transcript_path, transcript_blob_name)
1120
+
1121
+ # Update job
1122
+ old_status = job.status
1123
+ job.status = "completed"
1124
+ job.transcript_text = transcript
1125
+ job.transcript_url = transcript_url
1126
+ job.completed_at = datetime.now().isoformat()
1127
+ self.db.save_job(job)
1128
+
1129
+ self._log_status_change(job_id, old_status, job.status, job.original_filename, job.user_id)
1130
+ print(f"✅ [{job.user_id[:8]}...] Transcription completed: {job.original_filename}")
1131
+
1132
+ # Clean up
1133
+ try:
1134
+ os.remove(transcript_path)
1135
+ except:
1136
+ pass
1137
+
1138
+ elif data.get("status") in ("Failed", "FailedWithPartialResults"):
1139
+ error_message = ""
1140
+ if "properties" in data and "error" in data["properties"]:
1141
+ error_message = data["properties"]["error"].get("message", "")
1142
+ elif "error" in data:
1143
+ error_message = data["error"].get("message", "")
1144
+
1145
+ old_status = job.status
1146
+ job.status = "failed"
1147
+ job.error_message = f"Azure transcription failed: {error_message}"
1148
+ job.completed_at = datetime.now().isoformat()
1149
+ self.db.save_job(job)
1150
+
1151
+ self._log_status_change(job_id, old_status, job.status, job.original_filename, job.user_id)
1152
+ print(f"❌ [{job.user_id[:8]}...] Transcription failed: {job.original_filename} - {error_message}")
1153
+
1154
+ except Exception as e:
1155
+ print(f"❌ Status check failed for job {job_id[:8]}...: {str(e)}")
1156
+ job = self.db.get_job(job_id)
1157
+ if job:
1158
+ old_status = job.status
1159
+ job.status = "failed"
1160
+ job.error_message = f"Status check failed: {str(e)}"
1161
+ job.completed_at = datetime.now().isoformat()
1162
+ self.db.save_job(job)
1163
+ self._log_status_change(job_id, old_status, job.status, job.original_filename, job.user_id)
1164
+
1165
+ def get_job_status(self, job_id: str) -> Optional[TranscriptionJob]:
1166
+ """Get current job status"""
1167
+ return self.db.get_job(job_id)
1168
+
1169
+ def get_user_history(self, user_id: str, limit: int = 50) -> List[TranscriptionJob]:
1170
+ """Get user's transcription history - PDPA compliant"""
1171
+ return self.db.get_user_jobs(user_id, limit)
1172
+
1173
+ def get_all_history(self, limit: int = 100) -> List[TranscriptionJob]:
1174
+ """Get all transcription history across all users (admin view)"""
1175
+ return self.db.get_all_jobs(limit)
1176
+
1177
+ def get_user_stats(self, user_id: str) -> Dict:
1178
+ """Get user statistics"""
1179
+ return self.db.get_user_stats(user_id)
1180
+
1181
+ def get_user_stats_extended(self, user_id: str) -> Dict:
1182
+ """Get extended user statistics including AI summaries"""
1183
+ return self.db.get_user_stats_extended(user_id)
1184
+
1185
+ def download_transcript(self, job_id: str, user_id: str) -> Optional[str]:
1186
+ """Download transcript content - with user verification for PDPA compliance"""
1187
+ job = self.db.get_job(job_id)
1188
+ if job and job.user_id == user_id and job.transcript_text:
1189
+ return job.transcript_text
1190
+ return None
1191
+
1192
+ # AI Summary methods - ENHANCED integration
1193
+ def save_summary_job(self, job: SummaryJob):
1194
+ """Save AI summary job"""
1195
+ self.db.save_summary_job(job)
1196
+
1197
+ def get_summary_job(self, job_id: str) -> Optional[SummaryJob]:
1198
+ """Get AI summary job by ID"""
1199
+ return self.db.get_summary_job(job_id)
1200
+
1201
+ def get_user_summary_history(self, user_id: str, limit: int = 50) -> List[SummaryJob]:
1202
+ """Get user's AI summary history"""
1203
+ return self.db.get_user_summary_jobs(user_id, limit)
1204
+
1205
+ # Authentication methods
1206
+ def register_user(self, email: str, username: str, password: str, gdpr_consent: bool = True,
1207
+ data_retention_agreed: bool = True, marketing_consent: bool = False) -> Tuple[bool, str, Optional[str]]:
1208
+ """Register new user"""
1209
+ return self.db.create_user(email, username, password, gdpr_consent, data_retention_agreed, marketing_consent)
1210
+
1211
+ def login_user(self, login: str, password: str) -> Tuple[bool, str, Optional[User]]:
1212
+ """Login user"""
1213
+ return self.db.authenticate_user(login, password)
1214
+
1215
+ def get_user(self, user_id: str) -> Optional[User]:
1216
+ """Get user by ID"""
1217
+ return self.db.get_user_by_id(user_id)
1218
+
1219
+ def update_user_consent(self, user_id: str, marketing_consent: bool) -> bool:
1220
+ """Update user marketing consent"""
1221
+ return self.db.update_user_consent(user_id, marketing_consent)
1222
+
1223
+ def export_user_data(self, user_id: str) -> Dict:
1224
+ """Export all user data for GDPR compliance"""
1225
+ return self.db.export_user_data(user_id)
1226
+
1227
+ def delete_user_account(self, user_id: str) -> bool:
1228
+ """Delete user account and all data"""
1229
+ return self.db.delete_user_account(user_id)
1230
+
1231
+ # Helper methods (using existing implementations)
1232
+ def _convert_to_audio(self, input_path, output_path, audio_format="wav"):
1233
+ """Convert audio/video file to specified audio format"""
1234
+ # Ensure output directory exists
1235
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
1236
+
1237
+ if audio_format in {"wav", "alaw", "mulaw"}:
1238
+ cmd = [
1239
+ "ffmpeg", "-y", "-i", input_path,
1240
+ "-ar", "16000", "-ac", "1",
1241
+ output_path
1242
+ ]
1243
+ else:
1244
+ cmd = [
1245
+ "ffmpeg", "-y", "-i", input_path,
1246
+ output_path
1247
+ ]
1248
+
1249
+ try:
1250
+ result = subprocess.run(
1251
+ cmd,
1252
+ stdout=subprocess.PIPE,
1253
+ stderr=subprocess.PIPE,
1254
+ timeout=300, # 5 minute timeout
1255
+ text=True
1256
+ )
1257
+
1258
+ if result.returncode != 0:
1259
+ error_output = result.stderr
1260
+ raise Exception(f"FFmpeg conversion failed: {error_output}")
1261
+
1262
+ # Verify output file exists and has content
1263
+ if not os.path.exists(output_path):
1264
+ raise Exception(f"Output file was not created: {output_path}")
1265
+
1266
+ file_size = os.path.getsize(output_path)
1267
+ if file_size == 0:
1268
+ raise Exception(f"Output file is empty: {output_path}")
1269
+
1270
+ except subprocess.TimeoutExpired:
1271
+ raise Exception(f"FFmpeg conversion timed out after 5 minutes")
1272
+ except Exception as e:
1273
+ if "FFmpeg conversion failed" in str(e):
1274
+ raise # Re-raise our detailed error
1275
+ else:
1276
+ raise Exception(f"FFmpeg error: {str(e)}")
1277
+
1278
+ def _upload_blob(self, local_file, blob_name):
1279
+ blob_client = self.blob_service.get_blob_client(container=AZURE_CONTAINER, blob=blob_name)
1280
+ with open(local_file, "rb") as data:
1281
+ blob_client.upload_blob(data, overwrite=True)
1282
+ sas = AZURE_BLOB_SAS_TOKEN.lstrip("?")
1283
+ return f"{blob_client.url}?{sas}"
1284
+
1285
+ def _create_transcription(self, audio_url, language, diarization_enabled, speakers,
1286
+ profanity, punctuation, timestamps, lexical,
1287
+ language_id_enabled=False, candidate_locales=None):
1288
+ url = f"{AZURE_SPEECH_KEY_ENDPOINT}/speechtotext/{API_VERSION}/transcriptions"
1289
+ headers = {
1290
+ "Ocp-Apim-Subscription-Key": AZURE_SPEECH_KEY,
1291
+ "Content-Type": "application/json"
1292
+ }
1293
+
1294
+ properties = {
1295
+ "profanityFilterMode": profanity,
1296
+ "punctuationMode": punctuation,
1297
+ "wordLevelTimestampsEnabled": timestamps,
1298
+ "displayFormWordLevelTimestampsEnabled": timestamps,
1299
+ "lexical": lexical
1300
+ }
1301
+ if diarization_enabled:
1302
+ properties["diarizationEnabled"] = True
1303
+ properties["diarization"] = {
1304
+ "speakers": {
1305
+ "minCount": 1,
1306
+ "maxCount": int(speakers)
1307
+ }
1308
+ }
1309
+ if language_id_enabled and candidate_locales:
1310
+ properties["languageIdentification"] = {
1311
+ "mode": "continuous",
1312
+ "candidateLocales": candidate_locales
1313
+ }
1314
+
1315
+ properties = {k: v for k, v in properties.items() if v is not None}
1316
+ body = {
1317
+ "displayName": f"Transcription_{uuid.uuid4()}",
1318
+ "description": "Batch speech-to-text with advanced options",
1319
+ "locale": language,
1320
+ "contentUrls": [audio_url],
1321
+ "properties": properties,
1322
+ "customProperties": {}
1323
+ }
1324
+ r = requests.post(url, headers=headers, json=body)
1325
+ r.raise_for_status()
1326
+ trans_id = r.headers["Location"].split("/")[-1].split("?")[0]
1327
+ return trans_id
1328
+
1329
+ def _get_transcription_result_url(self, trans_id):
1330
+ url = f"{AZURE_SPEECH_KEY_ENDPOINT}/speechtotext/{API_VERSION}/transcriptions/{trans_id}"
1331
+ headers = {"Ocp-Apim-Subscription-Key": AZURE_SPEECH_KEY}
1332
+
1333
+ r = requests.get(url, headers=headers)
1334
+ data = r.json()
1335
+
1336
+ if data.get("status") == "Succeeded":
1337
+ files_url = None
1338
+ if "links" in data and "files" in data["links"]:
1339
+ files_url = data["links"]["files"]
1340
+ if files_url:
1341
+ r2 = requests.get(files_url, headers=headers)
1342
+ file_list = r2.json().get("values", [])
1343
+ for f in file_list:
1344
+ if f.get("kind", "").lower() == "transcription":
1345
+ return f["links"]["contentUrl"]
1346
+ return None
1347
+
1348
+ def _fetch_transcript(self, content_url):
1349
+ """Enhanced transcript fetching with improved timestamp handling"""
1350
+ r = requests.get(content_url)
1351
+ try:
1352
+ j = r.json()
1353
+ out = []
1354
+
1355
+ def get_text(phrase):
1356
+ if 'nBest' in phrase and phrase['nBest']:
1357
+ return phrase['nBest'][0].get('display', '') or phrase.get('display', '')
1358
+ return phrase.get('display', '')
1359
+
1360
+ def safe_offset(val):
1361
+ try:
1362
+ return int(val)
1363
+ except (ValueError, TypeError):
1364
+ return None
1365
+
1366
+ def format_time(seconds):
1367
+ """Format seconds into HH:MM:SS format"""
1368
+ try:
1369
+ td = timedelta(seconds=int(seconds))
1370
+ hours, remainder = divmod(td.total_seconds(), 3600)
1371
+ minutes, seconds = divmod(remainder, 60)
1372
+ return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
1373
+ except:
1374
+ return "00:00:00"
1375
+
1376
+ # Check if this is a diarization result or regular transcription
1377
+ if 'recognizedPhrases' in j:
1378
+ for phrase in j['recognizedPhrases']:
1379
+ speaker_id = phrase.get('speaker', 0) # Default to speaker 0 if not present
1380
+ text = get_text(phrase)
1381
+
1382
+ if not text.strip():
1383
+ continue
1384
+
1385
+ # Try to get timestamp from multiple possible locations
1386
+ timestamp_seconds = None
1387
+
1388
+ # Method 1: Direct offset from phrase
1389
+ if 'offset' in phrase and phrase['offset'] is not None:
1390
+ offset_100ns = safe_offset(phrase['offset'])
1391
+ if offset_100ns is not None:
1392
+ timestamp_seconds = offset_100ns / 10_000_000
1393
+
1394
+ # Method 2: Offset from first word
1395
+ if timestamp_seconds is None and 'words' in phrase and phrase['words']:
1396
+ first_word = phrase['words'][0]
1397
+ if 'offset' in first_word and first_word['offset'] is not None:
1398
+ offset_100ns = safe_offset(first_word['offset'])
1399
+ if offset_100ns is not None:
1400
+ timestamp_seconds = offset_100ns / 10_000_000
1401
+
1402
+ # Method 3: offsetInTicks (alternative field name)
1403
+ if timestamp_seconds is None and 'offsetInTicks' in phrase:
1404
+ offset_ticks = safe_offset(phrase['offsetInTicks'])
1405
+ if offset_ticks is not None:
1406
+ timestamp_seconds = offset_ticks / 10_000_000
1407
+
1408
+ # Format output based on whether we have speaker diarization and timestamps
1409
+ if timestamp_seconds is not None:
1410
+ time_str = format_time(timestamp_seconds)
1411
+ if 'speaker' in phrase:
1412
+ # Speaker diarization with timestamp
1413
+ out.append(f"[{time_str}] Speaker {speaker_id}: {text}")
1414
+ else:
1415
+ # Just timestamp, no speaker
1416
+ out.append(f"[{time_str}] {text}")
1417
+ else:
1418
+ # No timestamp available
1419
+ if 'speaker' in phrase:
1420
+ out.append(f"Speaker {speaker_id}: {text}")
1421
+ else:
1422
+ out.append(text)
1423
+
1424
+ if out:
1425
+ return '\n\n'.join(out)
1426
+
1427
+ # Fallback: handle combined results or other formats
1428
+ if 'combinedRecognizedPhrases' in j:
1429
+ combined_results = []
1430
+ for combined_phrase in j['combinedRecognizedPhrases']:
1431
+ text = combined_phrase.get('display', '')
1432
+ if text.strip():
1433
+ combined_results.append(text)
1434
+
1435
+ if combined_results:
1436
+ return '\n\n'.join(combined_results)
1437
+
1438
+ # Last resort: return raw JSON for debugging
1439
+ return json.dumps(j, ensure_ascii=False, indent=2)
1440
+
1441
+ except Exception as e:
1442
+ return f"Unable to parse transcription result: {str(e)}\n\nRaw response: {r.text[:1000]}..."
1443
+
1444
+ # Global transcription manager instance with AI summary support
1445
+ transcription_manager = TranscriptionManager()
1446
+
1447
+ # Backward compatibility functions
1448
+ def allowed_file(filename):
1449
+ """Check if file extension is supported"""
1450
+ if not filename or filename in ["upload.unknown", ""]:
1451
+ return True # Let FFmpeg handle unknown formats
1452
+
1453
+ if '.' not in filename:
1454
+ return True # No extension, let FFmpeg try
1455
+
1456
+ ext = filename.rsplit('.', 1)[1].lower()
1457
+ supported_extensions = set(AUDIO_FORMATS) | {
1458
+ 'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4a', '3gp', 'f4v',
1459
+ 'wmv', 'asf', 'rm', 'rmvb', 'flv', 'mpg', 'mpeg', 'mts', 'vob'
1460
+ }
1461
+
1462
+ return ext in supported_extensions
1463
+
1464
+ def generate_user_session():
1465
+ """Generate a unique user session ID - kept for compatibility"""
1466
+ return str(uuid.uuid4())
1467
+
1468
+ # Keep all existing exports for backward compatibility
1469
+ ALLOWED_LANGS = ALLOWED_LANGS
1470
+ AUDIO_FORMATS = AUDIO_FORMATS
1471
+ allowed_file = allowed_file
1472
+ User = User
env_template.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Azure Speech Services Configuration
2
+ AZURE_SPEECH_KEY=xxx
3
+ AZURE_SPEECH_KEY_ENDPOINT=https://xxx
4
+ AZURE_REGION=xxx
5
+
6
+ # Azure Blob Storage Configuration
7
+ AZURE_BLOB_CONNECTION=xxx
8
+ AZURE_CONTAINER=xxx
9
+ AZURE_BLOB_SAS_TOKEN=xxx
10
+
11
+ # Computer Vision Services Configuration (NEW)
12
+ COMPUTER_VISION_ENDPOINT=https://xxx
13
+ COMPUTER_VISION_KEY=xxx
14
+ COMPUTER_VISION_REGION=xxx
15
+
16
+ # AI Agents Configuration (NEW)
17
+ AI_PROJECT_ENDPOINT=https://xxx
18
+ AI_PROJECT_KEY=xxx
19
+ AI_AGENT_ID=xxx
20
+
21
+ # Azure OpenAI Configuration
22
+ AZURE_OPENAI_ENDPOINT=https://xxx
23
+ AZURE_OPENAI_KEY=xxx
24
+ AZURE_OPENAI_DEPLOYMENT=xxx
25
+ AZURE_OPENAI_API_VERSION=xxx
26
+
27
+ # API Configuration
28
+ API_VERSION=v3.2
29
+
30
+ # Allowed Languages Configuration
31
+ ALLOWED_LANGS={"en-US": "English (US)", "en-GB": "English (UK)", "es-ES": "Spanish", "fr-FR": "French", "de-DE": "German", "it-IT": "Italian", "pt-BR": "Portuguese (Brazil)", "zh-CN": "Chinese (Simplified)", "ja-JP": "Japanese", "ko-KR": "Korean", "ru-RU": "Russian", "ar-SA": "Arabic", "hi-IN": "Hindi", "th-TH": "Thai", "vi-VN": "Vietnamese", "nl-NL": "Dutch", "sv-SE": "Swedish", "da-DK": "Danish", "no-NO": "Norwegian", "fi-FI": "Finnish", "pl-PL": "Polish", "cs-CZ": "Czech", "hu-HU": "Hungarian", "ro-RO": "Romanian", "bg-BG": "Bulgarian", "hr-HR": "Croatian", "sk-SK": "Slovak", "sl-SI": "Slovenian", "et-EE": "Estonian", "lv-LV": "Latvian", "lt-LT": "Lithuanian", "uk-UA": "Ukrainian", "el-GR": "Greek", "tr-TR": "Turkish", "he-IL": "Hebrew", "fa-IR": "Persian", "ur-PK": "Urdu", "bn-BD": "Bengali", "ta-IN": "Tamil", "te-IN": "Telugu", "ml-IN": "Malayalam", "kn-IN": "Kannada", "gu-IN": "Gujarati", "pa-IN": "Punjabi", "mr-IN": "Marathi", "ne-NP": "Nepali", "si-LK": "Sinhala", "my-MM": "Myanmar", "km-KH": "Khmer", "lo-LA": "Lao", "ka-GE": "Georgian", "am-ET": "Amharic", "sw-TZ": "Swahili", "zu-ZA": "Zulu", "af-ZA": "Afrikaans", "is-IS": "Icelandic", "mt-MT": "Maltese", "cy-GB": "Welsh", "ga-IE": "Irish", "eu-ES": "Basque", "ca-ES": "Catalan", "gl-ES": "Galician", "pt-PT": "Portuguese (Portugal)", "fr-CA": "French (Canada)", "en-AU": "English (Australia)", "en-IN": "English (India)", "en-CA": "English (Canada)", "en-NZ": "English (New Zealand)", "en-ZA": "English (South Africa)", "es-MX": "Spanish (Mexico)", "es-AR": "Spanish (Argentina)", "es-CO": "Spanish (Colombia)", "es-CL": "Spanish (Chile)", "es-PE": "Spanish (Peru)", "es-VE": "Spanish (Venezuela)", "es-EC": "Spanish (Ecuador)", "es-GT": "Spanish (Guatemala)", "es-CR": "Spanish (Costa Rica)", "es-PA": "Spanish (Panama)", "es-DO": "Spanish (Dominican Republic)", "es-PR": "Spanish (Puerto Rico)", "es-UY": "Spanish (Uruguay)", "es-PY": "Spanish (Paraguay)", "es-BO": "Spanish (Bolivia)", "es-SV": "Spanish (El Salvador)", "es-HN": "Spanish (Honduras)", "es-NI": "Spanish (Nicaragua)", "zh-TW": "Chinese (Traditional)", "zh-HK": "Chinese (Hong Kong)"}
32
+
33
+ # Application Settings
34
+ DEBUG=False
35
+ UPLOAD_MAX_SIZE_MB=500
36
+ MAX_CONCURRENT_JOBS=5
37
+ DATABASE_PATH=xxx/xxx
38
+ CLEANUP_OLDER_THAN_DAYS=30
39
+
40
+ # # Security Settings (Optional - for enhanced security)
41
+ # SECRET_KEY=your_secret_key_for_sessions
42
+ # ENCRYPTION_KEY=your_encryption_key_for_sensitive_data
43
+
44
+ # Logging Settings
45
+ LOG_LEVEL=INFO
46
+ LOG_FILE=app.log
47
+
48
+ # Performance Settings
49
+ FRAME_EXTRACTION_MAX_FRAMES=50
50
+ FRAME_SIMILARITY_THRESHOLD=0.85
51
+ MIN_TIME_BETWEEN_FRAMES=2.0
52
+
53
+ # File Processing Settings
54
+ SUPPORTED_VIDEO_FORMATS=mp4,mov,avi,mkv,webm,flv,3gp,wmv
55
+ SUPPORTED_AUDIO_FORMATS=wav,mp3,ogg,opus,flac,wma,aac,m4a,amr,speex
56
+ SUPPORTED_DOCUMENT_FORMATS=pdf,docx,doc,pptx,ppt,xlsx,xls,csv,txt,json
57
+ SUPPORTED_IMAGE_FORMATS=jpg,jpeg,png,bmp,gif,tiff,webp
58
+
59
+ # Database Settings
60
+ DATABASE_BACKUP_INTERVAL=30
61
+ DATABASE_LOCATION=xxx
62
+ TEMP_FILES_CLEANUP_HOURS=24
file_processors.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ import subprocess
5
+ from typing import Optional, Dict, Any
6
+ import PyPDF2
7
+ import docx
8
+ from openpyxl import load_workbook
9
+ import pandas as pd
10
+ from pptx import Presentation
11
+ from PIL import Image
12
+ import zipfile
13
+
14
+ class FileProcessor:
15
+ """Process various file types and extract text content"""
16
+
17
+ def __init__(self):
18
+ self.supported_extensions = {
19
+ 'pdf': self._process_pdf,
20
+ 'docx': self._process_docx,
21
+ 'doc': self._process_doc,
22
+ 'pptx': self._process_pptx,
23
+ 'ppt': self._process_ppt,
24
+ 'xlsx': self._process_xlsx,
25
+ 'xls': self._process_xls,
26
+ 'csv': self._process_csv,
27
+ 'txt': self._process_txt,
28
+ 'json': self._process_json,
29
+ 'rtf': self._process_rtf,
30
+ 'odt': self._process_odt,
31
+ 'ods': self._process_ods,
32
+ 'odp': self._process_odp
33
+ }
34
+
35
+ def process_file(self, file_path: str, extension: str = None) -> Optional[str]:
36
+ """Process a file and extract its text content"""
37
+ try:
38
+ if not os.path.exists(file_path):
39
+ print(f"❌ File not found: {file_path}")
40
+ return None
41
+
42
+ # Determine extension if not provided
43
+ if not extension:
44
+ extension = file_path.split('.')[-1].lower() if '.' in file_path else ''
45
+
46
+ extension = extension.lower().strip('.')
47
+
48
+ if extension not in self.supported_extensions:
49
+ print(f"⚠️ Unsupported file extension: {extension}")
50
+ return f"Unsupported file type: .{extension}"
51
+
52
+ # Process file based on extension
53
+ processor = self.supported_extensions[extension]
54
+ content = processor(file_path)
55
+
56
+ if content:
57
+ print(f"✅ Successfully processed {extension.upper()} file: {os.path.basename(file_path)}")
58
+ return content
59
+ else:
60
+ print(f"⚠️ No content extracted from: {os.path.basename(file_path)}")
61
+ return f"Could not extract content from {extension.upper()} file"
62
+
63
+ except Exception as e:
64
+ print(f"❌ Error processing file {file_path}: {e}")
65
+ return f"Error processing file: {str(e)}"
66
+
67
+ def _process_pdf(self, file_path: str) -> Optional[str]:
68
+ """Extract text from PDF files"""
69
+ try:
70
+ text_content = []
71
+
72
+ with open(file_path, 'rb') as file:
73
+ pdf_reader = PyPDF2.PdfReader(file)
74
+
75
+ for page_num, page in enumerate(pdf_reader.pages):
76
+ try:
77
+ page_text = page.extract_text()
78
+ if page_text.strip():
79
+ text_content.append(f"--- Page {page_num + 1} ---")
80
+ text_content.append(page_text)
81
+ text_content.append("")
82
+ except Exception as e:
83
+ text_content.append(f"--- Page {page_num + 1} (Error reading) ---")
84
+ print(f"⚠️ Error reading PDF page {page_num + 1}: {e}")
85
+
86
+ if not text_content:
87
+ # Try alternative PDF processing with pdfplumber if available
88
+ try:
89
+ import pdfplumber
90
+ with pdfplumber.open(file_path) as pdf:
91
+ for page_num, page in enumerate(pdf.pages):
92
+ page_text = page.extract_text()
93
+ if page_text:
94
+ text_content.append(f"--- Page {page_num + 1} ---")
95
+ text_content.append(page_text)
96
+ text_content.append("")
97
+ except ImportError:
98
+ return "PDF contains non-text content or requires advanced processing"
99
+
100
+ return "\n".join(text_content) if text_content else None
101
+
102
+ except Exception as e:
103
+ print(f"❌ Error processing PDF: {e}")
104
+ return None
105
+
106
+ def _process_docx(self, file_path: str) -> Optional[str]:
107
+ """Extract text from DOCX files"""
108
+ try:
109
+ doc = docx.Document(file_path)
110
+ text_content = []
111
+
112
+ # Extract paragraphs
113
+ for paragraph in doc.paragraphs:
114
+ if paragraph.text.strip():
115
+ text_content.append(paragraph.text)
116
+
117
+ # Extract tables
118
+ for table in doc.tables:
119
+ text_content.append("\n--- Table ---")
120
+ for row in table.rows:
121
+ row_text = []
122
+ for cell in row.cells:
123
+ row_text.append(cell.text.strip())
124
+ text_content.append(" | ".join(row_text))
125
+ text_content.append("--- End Table ---\n")
126
+
127
+ return "\n".join(text_content) if text_content else None
128
+
129
+ except Exception as e:
130
+ print(f"❌ Error processing DOCX: {e}")
131
+ return None
132
+
133
+ def _process_doc(self, file_path: str) -> Optional[str]:
134
+ """Extract text from DOC files using python-docx2txt or antiword"""
135
+ try:
136
+ # Try with docx2txt first
137
+ try:
138
+ import docx2txt
139
+ text = docx2txt.process(file_path)
140
+ return text if text.strip() else None
141
+ except ImportError:
142
+ pass
143
+
144
+ # Try with antiword (if available on system)
145
+ try:
146
+ result = subprocess.run(
147
+ ['antiword', file_path],
148
+ capture_output=True,
149
+ text=True,
150
+ timeout=30
151
+ )
152
+ if result.returncode == 0:
153
+ return result.stdout
154
+ except (subprocess.SubprocessError, FileNotFoundError):
155
+ pass
156
+
157
+ return "DOC file processing requires additional tools (docx2txt or antiword)"
158
+
159
+ except Exception as e:
160
+ print(f"❌ Error processing DOC: {e}")
161
+ return None
162
+
163
+ def _process_pptx(self, file_path: str) -> Optional[str]:
164
+ """Extract text from PPTX files"""
165
+ try:
166
+ presentation = Presentation(file_path)
167
+ text_content = []
168
+
169
+ for slide_num, slide in enumerate(presentation.slides, 1):
170
+ slide_text = []
171
+ slide_text.append(f"--- Slide {slide_num} ---")
172
+
173
+ # Extract text from shapes
174
+ for shape in slide.shapes:
175
+ if hasattr(shape, "text") and shape.text.strip():
176
+ slide_text.append(shape.text)
177
+
178
+ # Extract notes
179
+ if slide.has_notes_slide:
180
+ notes_text = slide.notes_slide.notes_text_frame.text
181
+ if notes_text.strip():
182
+ slide_text.append(f"Notes: {notes_text}")
183
+
184
+ if len(slide_text) > 1: # More than just the slide header
185
+ text_content.extend(slide_text)
186
+ text_content.append("")
187
+
188
+ return "\n".join(text_content) if text_content else None
189
+
190
+ except Exception as e:
191
+ print(f"❌ Error processing PPTX: {e}")
192
+ return None
193
+
194
+ def _process_ppt(self, file_path: str) -> Optional[str]:
195
+ """Extract text from PPT files"""
196
+ try:
197
+ # Try with python-pptx if the file can be converted
198
+ # For legacy PPT files, this is more complex
199
+ return "PPT file processing requires conversion to PPTX format"
200
+
201
+ except Exception as e:
202
+ print(f"❌ Error processing PPT: {e}")
203
+ return None
204
+
205
+ def _process_xlsx(self, file_path: str) -> Optional[str]:
206
+ """Extract text from XLSX files"""
207
+ try:
208
+ workbook = load_workbook(file_path, data_only=True)
209
+ text_content = []
210
+
211
+ for sheet_name in workbook.sheetnames:
212
+ sheet = workbook[sheet_name]
213
+ text_content.append(f"--- Sheet: {sheet_name} ---")
214
+
215
+ # Convert to DataFrame for easier processing
216
+ data = []
217
+ for row in sheet.iter_rows(values_only=True):
218
+ if any(cell is not None for cell in row):
219
+ data.append([str(cell) if cell is not None else "" for cell in row])
220
+
221
+ if data:
222
+ # Create a simple table representation
223
+ for row in data[:100]: # Limit to first 100 rows
224
+ text_content.append(" | ".join(row))
225
+
226
+ text_content.append("")
227
+
228
+ return "\n".join(text_content) if text_content else None
229
+
230
+ except Exception as e:
231
+ print(f"❌ Error processing XLSX: {e}")
232
+ return None
233
+
234
+ def _process_xls(self, file_path: str) -> Optional[str]:
235
+ """Extract text from XLS files"""
236
+ try:
237
+ # Use pandas to read XLS files
238
+ xl_file = pd.ExcelFile(file_path)
239
+ text_content = []
240
+
241
+ for sheet_name in xl_file.sheet_names:
242
+ text_content.append(f"--- Sheet: {sheet_name} ---")
243
+
244
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
245
+
246
+ # Convert DataFrame to string representation
247
+ if not df.empty:
248
+ # Get first 100 rows
249
+ limited_df = df.head(100)
250
+ text_content.append(limited_df.to_string(index=False))
251
+
252
+ text_content.append("")
253
+
254
+ return "\n".join(text_content) if text_content else None
255
+
256
+ except Exception as e:
257
+ print(f"❌ Error processing XLS: {e}")
258
+ return None
259
+
260
+ def _process_csv(self, file_path: str) -> Optional[str]:
261
+ """Extract text from CSV files"""
262
+ try:
263
+ df = pd.read_csv(file_path, encoding='utf-8')
264
+
265
+ text_content = []
266
+ text_content.append("--- CSV Data ---")
267
+ text_content.append(f"Columns: {', '.join(df.columns.tolist())}")
268
+ text_content.append(f"Total rows: {len(df)}")
269
+ text_content.append("")
270
+
271
+ # Show first 50 rows
272
+ limited_df = df.head(50)
273
+ text_content.append(limited_df.to_string(index=False))
274
+
275
+ if len(df) > 50:
276
+ text_content.append(f"\n... and {len(df) - 50} more rows")
277
+
278
+ return "\n".join(text_content)
279
+
280
+ except Exception as e:
281
+ print(f"❌ Error processing CSV: {e}")
282
+ return None
283
+
284
+ def _process_txt(self, file_path: str) -> Optional[str]:
285
+ """Extract text from TXT files"""
286
+ try:
287
+ encodings = ['utf-8', 'utf-16', 'latin-1', 'cp1252']
288
+
289
+ for encoding in encodings:
290
+ try:
291
+ with open(file_path, 'r', encoding=encoding) as file:
292
+ content = file.read()
293
+ return content if content.strip() else None
294
+ except UnicodeDecodeError:
295
+ continue
296
+
297
+ # If all encodings fail, try with error handling
298
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
299
+ return file.read()
300
+
301
+ except Exception as e:
302
+ print(f"❌ Error processing TXT: {e}")
303
+ return None
304
+
305
+ def _process_json(self, file_path: str) -> Optional[str]:
306
+ """Extract text from JSON files"""
307
+ try:
308
+ with open(file_path, 'r', encoding='utf-8') as file:
309
+ data = json.load(file)
310
+
311
+ # Convert JSON to readable text format
312
+ if isinstance(data, dict):
313
+ text_content = ["--- JSON Data ---"]
314
+ text_content.append(json.dumps(data, indent=2, ensure_ascii=False))
315
+ elif isinstance(data, list):
316
+ text_content = ["--- JSON Array ---"]
317
+ text_content.append(f"Array with {len(data)} items:")
318
+ text_content.append(json.dumps(data[:10], indent=2, ensure_ascii=False))
319
+ if len(data) > 10:
320
+ text_content.append(f"... and {len(data) - 10} more items")
321
+ else:
322
+ text_content = [str(data)]
323
+
324
+ return "\n".join(text_content)
325
+
326
+ except Exception as e:
327
+ print(f"❌ Error processing JSON: {e}")
328
+ return None
329
+
330
+ def _process_rtf(self, file_path: str) -> Optional[str]:
331
+ """Extract text from RTF files"""
332
+ try:
333
+ # Try with striprtf if available
334
+ try:
335
+ from striprtf.striprtf import rtf_to_text
336
+ with open(file_path, 'r', encoding='utf-8') as file:
337
+ rtf_content = file.read()
338
+ return rtf_to_text(rtf_content)
339
+ except ImportError:
340
+ return "RTF file processing requires striprtf package"
341
+
342
+ except Exception as e:
343
+ print(f"❌ Error processing RTF: {e}")
344
+ return None
345
+
346
+ def _process_odt(self, file_path: str) -> Optional[str]:
347
+ """Extract text from ODT files"""
348
+ try:
349
+ # ODT files are actually ZIP archives
350
+ text_content = []
351
+
352
+ with zipfile.ZipFile(file_path, 'r') as zip_file:
353
+ # Try to read content.xml
354
+ if 'content.xml' in zip_file.namelist():
355
+ content_xml = zip_file.read('content.xml')
356
+ # This would need XML parsing to extract actual text
357
+ # For now, return a placeholder
358
+ return "ODT file detected - requires XML parsing for full text extraction"
359
+
360
+ return "Could not process ODT file"
361
+
362
+ except Exception as e:
363
+ print(f"❌ Error processing ODT: {e}")
364
+ return None
365
+
366
+ def _process_ods(self, file_path: str) -> Optional[str]:
367
+ """Extract text from ODS files"""
368
+ try:
369
+ return "ODS file processing not yet implemented"
370
+ except Exception as e:
371
+ print(f"❌ Error processing ODS: {e}")
372
+ return None
373
+
374
+ def _process_odp(self, file_path: str) -> Optional[str]:
375
+ """Extract text from ODP files"""
376
+ try:
377
+ return "ODP file processing not yet implemented"
378
+ except Exception as e:
379
+ print(f"❌ Error processing ODP: {e}")
380
+ return None
381
+
382
+ def get_file_info(self, file_path: str) -> Dict[str, Any]:
383
+ """Get basic information about a file"""
384
+ try:
385
+ stat = os.stat(file_path)
386
+ return {
387
+ 'filename': os.path.basename(file_path),
388
+ 'size': stat.st_size,
389
+ 'size_mb': round(stat.st_size / (1024 * 1024), 2),
390
+ 'extension': file_path.split('.')[-1].lower() if '.' in file_path else '',
391
+ 'supported': file_path.split('.')[-1].lower() in self.supported_extensions,
392
+ 'modified': stat.st_mtime
393
+ }
394
+ except Exception as e:
395
+ return {
396
+ 'filename': os.path.basename(file_path) if file_path else 'unknown',
397
+ 'error': str(e),
398
+ 'supported': False
399
+ }
400
+
401
+ def batch_process_files(self, file_paths: list) -> Dict[str, Any]:
402
+ """Process multiple files and return combined results"""
403
+ results = {
404
+ 'successful': [],
405
+ 'failed': [],
406
+ 'combined_content': [],
407
+ 'total_files': len(file_paths)
408
+ }
409
+
410
+ for file_path in file_paths:
411
+ try:
412
+ file_info = self.get_file_info(file_path)
413
+ if file_info.get('supported', False):
414
+ content = self.process_file(file_path)
415
+ if content:
416
+ results['successful'].append({
417
+ 'filename': file_info['filename'],
418
+ 'content': content,
419
+ 'size_mb': file_info['size_mb']
420
+ })
421
+ results['combined_content'].append(f"=== {file_info['filename']} ===")
422
+ results['combined_content'].append(content)
423
+ results['combined_content'].append("")
424
+ else:
425
+ results['failed'].append({
426
+ 'filename': file_info['filename'],
427
+ 'reason': 'No content extracted'
428
+ })
429
+ else:
430
+ results['failed'].append({
431
+ 'filename': file_info['filename'],
432
+ 'reason': 'Unsupported file type'
433
+ })
434
+
435
+ except Exception as e:
436
+ results['failed'].append({
437
+ 'filename': os.path.basename(file_path) if file_path else 'unknown',
438
+ 'reason': str(e)
439
+ })
440
+
441
+ results['combined_text'] = "\n".join(results['combined_content'])
442
+ return results
image_extraction.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import tempfile
5
+ import uuid
6
+ from typing import List, Dict, Optional, Tuple
7
+ from datetime import datetime
8
+ import subprocess
9
+ from PIL import Image
10
+ import hashlib
11
+
12
+ class VideoFrameExtractor:
13
+ """Extract significant frames from videos using computer vision techniques"""
14
+
15
+ def __init__(self):
16
+ self.temp_dir = tempfile.gettempdir()
17
+ self.similarity_threshold = 0.85 # Threshold for frame similarity
18
+ self.min_time_between_frames = 2.0 # Minimum seconds between extracted frames
19
+ self.max_frames = 50 # Maximum number of frames to extract
20
+
21
+ def extract_frames(self, video_path: str) -> List[Dict]:
22
+ """
23
+ Extract significant frames from video that represent content changes
24
+ (like slide transitions, not mouse movements)
25
+ """
26
+ try:
27
+ if not os.path.exists(video_path):
28
+ print(f"Video file not found: {video_path}")
29
+ return []
30
+
31
+ # Open video
32
+ cap = cv2.VideoCapture(video_path)
33
+ if not cap.isOpened():
34
+ print(f"Could not open video: {video_path}")
35
+ return []
36
+
37
+ # Get video properties
38
+ fps = cap.get(cv2.CAP_PROP_FPS)
39
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
40
+ duration = frame_count / fps if fps > 0 else 0
41
+
42
+ print(f"Processing video: {duration:.1f}s, {fps:.1f} FPS, {frame_count} frames")
43
+
44
+ # Extract frames using content-aware algorithm
45
+ extracted_frames = self._extract_content_frames(cap, fps)
46
+
47
+ cap.release()
48
+
49
+ print(f"Extracted {len(extracted_frames)} significant frames from video")
50
+ return extracted_frames
51
+
52
+ except Exception as e:
53
+ print(f"Error extracting frames: {e}")
54
+ return []
55
+
56
+ def _extract_content_frames(self, cap: cv2.VideoCapture, fps: float) -> List[Dict]:
57
+ """Extract frames based on content similarity analysis"""
58
+ extracted_frames = []
59
+ prev_frame = None
60
+ prev_frame_time = -self.min_time_between_frames
61
+ frame_number = 0
62
+
63
+ # Calculate frame skip for efficiency (process every Nth frame initially)
64
+ skip_frames = max(1, int(fps / 2)) # Process 2 frames per second initially
65
+
66
+ while len(extracted_frames) < self.max_frames:
67
+ ret, frame = cap.read()
68
+ if not ret:
69
+ break
70
+
71
+ current_time = frame_number / fps
72
+
73
+ # Skip frames for performance
74
+ if frame_number % skip_frames != 0:
75
+ frame_number += 1
76
+ continue
77
+
78
+ # Ensure minimum time between extractions
79
+ if current_time - prev_frame_time < self.min_time_between_frames:
80
+ frame_number += 1
81
+ continue
82
+
83
+ # Process frame
84
+ try:
85
+ is_significant = self._is_significant_change(frame, prev_frame)
86
+
87
+ if is_significant or prev_frame is None:
88
+ # Save frame
89
+ saved_frame = self._save_frame(frame, current_time, frame_number)
90
+ if saved_frame:
91
+ extracted_frames.append(saved_frame)
92
+ prev_frame_time = current_time
93
+
94
+ # Update previous frame for comparison
95
+ prev_frame = self._preprocess_frame(frame)
96
+ print(f"Extracted frame at {current_time:.1f}s")
97
+
98
+ except Exception as e:
99
+ print(f"Error processing frame {frame_number}: {e}")
100
+
101
+ frame_number += 1
102
+
103
+ return extracted_frames
104
+
105
+ def _is_significant_change(self, current_frame: np.ndarray, prev_frame: Optional[np.ndarray]) -> bool:
106
+ """Determine if current frame represents a significant change from previous frame"""
107
+ if prev_frame is None:
108
+ return True
109
+
110
+ try:
111
+ # Preprocess both frames
112
+ curr_processed = self._preprocess_frame(current_frame)
113
+
114
+ # Calculate multiple similarity metrics
115
+ structural_sim = self._calculate_structural_similarity(curr_processed, prev_frame)
116
+ histogram_sim = self._calculate_histogram_similarity(curr_processed, prev_frame)
117
+ edge_sim = self._calculate_edge_similarity(curr_processed, prev_frame)
118
+
119
+ # Combine metrics (weighted average)
120
+ combined_similarity = (
121
+ 0.4 * structural_sim +
122
+ 0.3 * histogram_sim +
123
+ 0.3 * edge_sim
124
+ )
125
+
126
+ # Frame is significant if similarity is below threshold
127
+ is_significant = combined_similarity < self.similarity_threshold
128
+
129
+ return is_significant
130
+
131
+ except Exception as e:
132
+ print(f"Error calculating frame similarity: {e}")
133
+ return False
134
+
135
+ def _preprocess_frame(self, frame: np.ndarray) -> np.ndarray:
136
+ """Preprocess frame for comparison (resize, blur to ignore minor changes)"""
137
+ try:
138
+ # Resize to standard size for comparison
139
+ resized = cv2.resize(frame, (320, 240))
140
+
141
+ # Convert to grayscale
142
+ gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
143
+
144
+ # Apply slight blur to ignore minor pixel changes (like cursor movement)
145
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
146
+
147
+ return blurred
148
+
149
+ except Exception as e:
150
+ print(f"Error preprocessing frame: {e}")
151
+ return frame
152
+
153
+ def _calculate_structural_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
154
+ """Calculate structural similarity between frames"""
155
+ try:
156
+ # Use template matching for structural similarity
157
+ result = cv2.matchTemplate(frame1, frame2, cv2.TM_CCOEFF_NORMED)
158
+ return float(np.max(result))
159
+ except:
160
+ # Fallback: simple correlation
161
+ correlation = cv2.correlateNormalized(frame1.flatten(), frame2.flatten())
162
+ return float(correlation) if not np.isnan(correlation) else 0.0
163
+
164
+ def _calculate_histogram_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
165
+ """Calculate histogram similarity between frames"""
166
+ try:
167
+ # Calculate histograms
168
+ hist1 = cv2.calcHist([frame1], [0], None, [256], [0, 256])
169
+ hist2 = cv2.calcHist([frame2], [0], None, [256], [0, 256])
170
+
171
+ # Compare histograms using correlation method
172
+ correlation = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
173
+ return float(correlation) if not np.isnan(correlation) else 0.0
174
+
175
+ except Exception as e:
176
+ return 0.0
177
+
178
+ def _calculate_edge_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
179
+ """Calculate edge similarity between frames"""
180
+ try:
181
+ # Apply Canny edge detection
182
+ edges1 = cv2.Canny(frame1, 50, 150)
183
+ edges2 = cv2.Canny(frame2, 50, 150)
184
+
185
+ # Calculate similarity of edge maps
186
+ diff = cv2.absdiff(edges1, edges2)
187
+ similarity = 1.0 - (np.sum(diff) / (diff.shape[0] * diff.shape[1] * 255))
188
+
189
+ return float(similarity)
190
+
191
+ except Exception as e:
192
+ return 0.0
193
+
194
+ def _save_frame(self, frame: np.ndarray, timestamp: float, frame_number: int) -> Optional[Dict]:
195
+ """Save extracted frame to temporary file"""
196
+ try:
197
+ # Generate unique filename
198
+ frame_id = str(uuid.uuid4())
199
+ filename = f"frame_{frame_id}_{int(timestamp)}s.jpg"
200
+ filepath = os.path.join(self.temp_dir, filename)
201
+
202
+ # Save frame as JPEG
203
+ success = cv2.imwrite(filepath, frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
204
+
205
+ if success and os.path.exists(filepath):
206
+ # Get file size
207
+ file_size = os.path.getsize(filepath)
208
+
209
+ return {
210
+ 'filename': filename,
211
+ 'path': filepath,
212
+ 'timestamp': timestamp,
213
+ 'frame_number': frame_number,
214
+ 'file_size': file_size,
215
+ 'created_at': datetime.now().isoformat()
216
+ }
217
+ else:
218
+ print(f"Failed to save frame at {timestamp}s")
219
+ return None
220
+
221
+ except Exception as e:
222
+ print(f"Error saving frame: {e}")
223
+ return None
224
+
225
+ def extract_frames_at_intervals(self, video_path: str, interval_seconds: float = 30.0) -> List[Dict]:
226
+ """Extract frames at regular intervals (fallback method)"""
227
+ try:
228
+ cap = cv2.VideoCapture(video_path)
229
+ if not cap.isOpened():
230
+ return []
231
+
232
+ fps = cap.get(cv2.CAP_PROP_FPS)
233
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
234
+ duration = frame_count / fps if fps > 0 else 0
235
+
236
+ extracted_frames = []
237
+ current_time = 0.0
238
+
239
+ while current_time < duration and len(extracted_frames) < self.max_frames:
240
+ # Seek to specific time
241
+ frame_number = int(current_time * fps)
242
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
243
+
244
+ ret, frame = cap.read()
245
+ if ret:
246
+ saved_frame = self._save_frame(frame, current_time, frame_number)
247
+ if saved_frame:
248
+ extracted_frames.append(saved_frame)
249
+
250
+ current_time += interval_seconds
251
+
252
+ cap.release()
253
+ return extracted_frames
254
+
255
+ except Exception as e:
256
+ print(f"Error extracting frames at intervals: {e}")
257
+ return []
258
+
259
+ def extract_key_frames_opencv(self, video_path: str) -> List[Dict]:
260
+ """Alternative method using OpenCV's built-in keyframe detection"""
261
+ try:
262
+ # This would require more advanced OpenCV features
263
+ # For now, implement a simple scene change detection
264
+
265
+ cap = cv2.VideoCapture(video_path)
266
+ if not cap.isOpened():
267
+ return []
268
+
269
+ fps = cap.get(cv2.CAP_PROP_FPS)
270
+ extracted_frames = []
271
+ prev_frame = None
272
+ frame_number = 0
273
+
274
+ while len(extracted_frames) < self.max_frames:
275
+ ret, frame = cap.read()
276
+ if not ret:
277
+ break
278
+
279
+ current_time = frame_number / fps
280
+
281
+ # Simple scene change detection using frame difference
282
+ if prev_frame is not None:
283
+ # Calculate frame difference
284
+ gray_curr = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
285
+ gray_prev = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
286
+
287
+ diff = cv2.absdiff(gray_curr, gray_prev)
288
+ mean_diff = np.mean(diff)
289
+
290
+ # Threshold for scene change
291
+ if mean_diff > 30: # Adjustable threshold
292
+ saved_frame = self._save_frame(frame, current_time, frame_number)
293
+ if saved_frame:
294
+ extracted_frames.append(saved_frame)
295
+
296
+ prev_frame = frame.copy()
297
+ frame_number += 1
298
+
299
+ cap.release()
300
+ return extracted_frames
301
+
302
+ except Exception as e:
303
+ print(f"Error in OpenCV key frame extraction: {e}")
304
+ return []
305
+
306
+ def get_frame_hash(self, frame: np.ndarray) -> str:
307
+ """Generate hash for frame comparison"""
308
+ try:
309
+ # Resize and convert to grayscale for consistent hashing
310
+ small_frame = cv2.resize(frame, (16, 16))
311
+ gray_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)
312
+
313
+ # Create hash from pixel values
314
+ frame_hash = hashlib.md5(gray_frame.tobytes()).hexdigest()
315
+ return frame_hash
316
+
317
+ except Exception as e:
318
+ print(f"Error generating frame hash: {e}")
319
+ return ""
320
+
321
+ def cleanup_temp_files(self, frame_list: List[Dict]):
322
+ """Clean up temporary frame files"""
323
+ for frame_info in frame_list:
324
+ try:
325
+ if 'path' in frame_info and os.path.exists(frame_info['path']):
326
+ os.remove(frame_info['path'])
327
+ except Exception as e:
328
+ print(f"Error cleaning up frame file: {e}")
329
+
330
+ def get_video_info(self, video_path: str) -> Dict:
331
+ """Get basic video information"""
332
+ try:
333
+ cap = cv2.VideoCapture(video_path)
334
+ if not cap.isOpened():
335
+ return {'error': 'Could not open video'}
336
+
337
+ fps = cap.get(cv2.CAP_PROP_FPS)
338
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
339
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
340
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
341
+ duration = frame_count / fps if fps > 0 else 0
342
+
343
+ cap.release()
344
+
345
+ return {
346
+ 'duration': duration,
347
+ 'fps': fps,
348
+ 'frame_count': frame_count,
349
+ 'resolution': f"{width}x{height}",
350
+ 'width': width,
351
+ 'height': height,
352
+ 'file_size': os.path.getsize(video_path) if os.path.exists(video_path) else 0
353
+ }
354
+
355
+ except Exception as e:
356
+ return {'error': str(e)}
357
+
358
+ class ImageAnalyzer:
359
+ """Additional image analysis utilities"""
360
+
361
+ def __init__(self):
362
+ pass
363
+
364
+ def detect_slide_content(self, image_path: str) -> Dict:
365
+ """Detect if image contains slide-like content"""
366
+ try:
367
+ image = cv2.imread(image_path)
368
+ if image is None:
369
+ return {'error': 'Could not load image'}
370
+
371
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
372
+
373
+ # Detect text regions
374
+ text_regions = self._detect_text_regions(gray)
375
+
376
+ # Detect geometric shapes (common in slides)
377
+ shapes = self._detect_shapes(gray)
378
+
379
+ # Calculate text density
380
+ text_density = len(text_regions) / (gray.shape[0] * gray.shape[1]) * 1000
381
+
382
+ return {
383
+ 'text_regions': len(text_regions),
384
+ 'shapes_detected': len(shapes),
385
+ 'text_density': text_density,
386
+ 'likely_slide': text_density > 0.5 or len(shapes) > 3
387
+ }
388
+
389
+ except Exception as e:
390
+ return {'error': str(e)}
391
+
392
+ def _detect_text_regions(self, gray_image: np.ndarray) -> List:
393
+ """Simple text region detection"""
394
+ try:
395
+ # Use MSER (Maximally Stable Extremal Regions) for text detection
396
+ mser = cv2.MSER_create()
397
+ regions, _ = mser.detectRegions(gray_image)
398
+ return regions
399
+ except:
400
+ return []
401
+
402
+ def _detect_shapes(self, gray_image: np.ndarray) -> List:
403
+ """Detect geometric shapes in image"""
404
+ try:
405
+ # Find contours
406
+ edges = cv2.Canny(gray_image, 50, 150)
407
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
408
+
409
+ shapes = []
410
+ for contour in contours:
411
+ area = cv2.contourArea(contour)
412
+ if area > 100: # Filter small contours
413
+ shapes.append(contour)
414
+
415
+ return shapes
416
+ except:
417
+ return []
implementation_guide.txt ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Conference Summarization System - Implementation Guide
2
+
3
+ ## Overview
4
+
5
+ This enhanced system transforms your basic transcription service into a comprehensive AI-powered conference analysis platform that combines:
6
+
7
+ - **Speech transcription** with speaker identification
8
+ - **Computer vision** for slide/document analysis
9
+ - **Multi-format file processing** (PDF, Word, Excel, PowerPoint, etc.)
10
+ - **Intelligent frame extraction** from videos
11
+ - **Advanced AI summarization** using Azure AI Agents
12
+
13
+ ## 📁 New File Structure
14
+
15
+ ```
16
+ your-project/
17
+ ├── app.py # ✅ Updated main Gradio interface
18
+ ├── app_core.py # ✅ Extended backend with AI features
19
+ ├── backend.py # ⚠️ Keep existing (imported by app_core.py)
20
+ ├── ai_summary.py # 🆕 AI summarization core logic
21
+ ├── file_processors.py # 🆕 Multi-format file processing
22
+ ├── image_extraction.py # 🆕 Video frame extraction with CV
23
+ ├── requirements.txt # ✅ Updated with new dependencies
24
+ ├── .env.example # ✅ Updated environment template
25
+ ├── README.md # ⚠️ Update with new features
26
+ ├── temp/ # 📁 Temporary files (auto-created)
27
+ ├── uploads/ # 📁 File uploads (existing)
28
+ ├── database/ # 📁 SQLite database (existing)
29
+ └── logs/ # 📁 Application logs (optional)
30
+ ```
31
+
32
+ ## 🔧 Setup Instructions
33
+
34
+ ### 1. Install Dependencies
35
+
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ ```
39
+
40
+ ### 2. Configure Azure Services
41
+
42
+ You need to set up these Azure services:
43
+
44
+ #### A. Existing Services (keep current configuration)
45
+ - **Azure Speech Services** - For transcription
46
+ - **Azure Blob Storage** - For file storage
47
+
48
+ #### B. New Services Required
49
+
50
+ **Computer Vision API:**
51
+ - Location/Region: eastus
52
+ - Endpoint: `https://image-process-256808.cognitiveservices.azure.com/`
53
+ - Get API key from Azure portal
54
+
55
+ **AI Agents Service:**
56
+ - Project endpoint: `https://aiservicetesting001.services.ai.azure.com/api/projects/aiagentdeplyomentproject`
57
+ - Agent ID: `asst_8isTjrGPs8M0d1RhkNONDtHK`
58
+ - Get API key from Azure AI Studio
59
+
60
+ ### 3. Update Environment Configuration
61
+
62
+ Copy `.env.example` to `.env` and fill in your actual values:
63
+
64
+ ```bash
65
+ cp .env.example .env
66
+ ```
67
+
68
+ **Critical new environment variables:**
69
+ ```bash
70
+ # Computer Vision
71
+ COMPUTER_VISION_ENDPOINT=https://your-cv-endpoint.cognitiveservices.azure.com/
72
+ COMPUTER_VISION_KEY=your_computer_vision_key
73
+ COMPUTER_VISION_REGION=eastus
74
+
75
+ # AI Agents
76
+ AI_PROJECT_ENDPOINT=https://your-ai-project.services.ai.azure.com/api/projects/your-project
77
+ AI_PROJECT_KEY=your_ai_project_key
78
+ AI_AGENT_ID=your_agent_id
79
+ ```
80
+
81
+ ### 4. Database Migration
82
+
83
+ The system will automatically create new tables for AI summary jobs when started. The extended database includes:
84
+
85
+ - `summary_jobs` table for AI summarization requests
86
+ - Additional indexes for performance
87
+ - Extended user statistics
88
+
89
+ ### 5. File Permissions
90
+
91
+ Ensure the application can write to:
92
+ ```bash
93
+ chmod 755 temp/
94
+ chmod 755 uploads/
95
+ chmod 755 database/
96
+ ```
97
+
98
+ ## 🚀 New Features Overview
99
+
100
+ ### 1. AI Summary Conference Tab
101
+
102
+ **Three Processing Modes:**
103
+ - **Batch Transcript:** Use existing transcripts from your history
104
+ - **Upload New Media:** Process new videos, audio, documents, images
105
+ - **Mixed Mode:** Combine both approaches
106
+
107
+ **Supported File Types:**
108
+ - **Video:** MP4, MOV, AVI, MKV, WebM, FLV (with frame extraction)
109
+ - **Audio:** WAV, MP3, OGG, OPUS, FLAC, M4A, AAC
110
+ - **Documents:** PDF, Word (.docx/.doc), PowerPoint (.pptx/.ppt)
111
+ - **Data:** Excel (.xlsx/.xls), CSV, JSON, TXT
112
+ - **Images:** JPG, PNG, BMP, GIF (with OCR)
113
+
114
+ ### 2. Intelligent Video Processing
115
+
116
+ **Smart Frame Extraction:**
117
+ - Detects significant content changes (slide transitions)
118
+ - Ignores minor movements (cursor, mouse)
119
+ - Uses computer vision similarity analysis
120
+ - Configurable similarity threshold (default: 85%)
121
+ - Maximum frame limit for performance (default: 50)
122
+
123
+ **Frame Analysis Pipeline:**
124
+ 1. Structural similarity comparison
125
+ 2. Histogram analysis for color changes
126
+ 3. Edge detection for layout changes
127
+ 4. Combined weighted scoring
128
+
129
+ ### 3. Computer Vision Integration
130
+
131
+ **OCR Text Extraction:**
132
+ - Reads text from slides, documents, images
133
+ - Handles multiple languages
134
+ - Preserves text positioning and structure
135
+
136
+ **Visual Content Analysis:**
137
+ - Describes images and charts
138
+ - Identifies visual elements
139
+ - Extracts metadata and confidence scores
140
+
141
+ ### 4. Multi-Format Document Processing
142
+
143
+ **Advanced Document Handlers:**
144
+ - **PDF:** PyPDF2 + pdfplumber fallback
145
+ - **Word:** python-docx with table extraction
146
+ - **PowerPoint:** python-pptx with slide-by-slide processing
147
+ - **Excel:** openpyxl + pandas with sheet separation
148
+ - **CSV/JSON:** Smart parsing with encoding detection
149
+
150
+ ### 5. AI-Powered Summarization
151
+
152
+ **Contextual Analysis:**
153
+ - Combines transcripts, documents, and visual content
154
+ - User prompt integration for corrections and focus
155
+ - Configurable output formats
156
+ - Action item extraction
157
+ - Timestamp preservation
158
+
159
+ ## 🎯 User Experience Flow
160
+
161
+ ### For Conference Organizers:
162
+ 1. **Upload conference video** → System extracts key slides automatically
163
+ 2. **Add presentation PDFs** → Text content integrated with transcription
164
+ 3. **Provide context prompt** → "This is Q4 review, focus on budget decisions"
165
+ 4. **Get comprehensive summary** → Executive summary with action items
166
+
167
+ ### For Meeting Participants:
168
+ 1. **Select existing transcripts** from previous sessions
169
+ 2. **Add supporting documents** shared during meetings
170
+ 3. **Specify focus areas** → "Extract technical decisions and timeline"
171
+ 4. **Download structured report** → Meeting minutes with timestamps
172
+
173
+ ### For Researchers:
174
+ 1. **Upload interview videos** → Automatic transcription + slide extraction
175
+ 2. **Include research documents** → Context integration
176
+ 3. **Custom analysis prompt** → "Identify key themes and participant insights"
177
+ 4. **Export detailed analysis** → Comprehensive research summary
178
+
179
+ ## 🔒 Security & Privacy Enhancements
180
+
181
+ **User Data Separation:**
182
+ - Each user's AI jobs stored in separate database partitions
183
+ - Blob storage maintains user-specific folders
184
+ - No cross-user data access possible
185
+
186
+ **GDPR Compliance Extensions:**
187
+ - AI summary jobs included in data exports
188
+ - Complete deletion covers all AI-generated content
189
+ - Audit trail for all AI processing activities
190
+
191
+ **Enterprise Security:**
192
+ - Azure Cognitive Services enterprise-grade security
193
+ - All processing done within your Azure tenant
194
+ - No data leaves your configured Azure region
195
+
196
+ ## 🚦 Performance Considerations
197
+
198
+ **Resource Usage:**
199
+ - Video processing: CPU-intensive for frame extraction
200
+ - AI summarization: Network-intensive for API calls
201
+ - Document processing: Memory-intensive for large files
202
+
203
+ **Optimization Tips:**
204
+ - Limit video duration to 2 hours for optimal performance
205
+ - Use high-quality source videos for better frame extraction
206
+ - Process large document batches during off-peak hours
207
+
208
+ **Scaling Options:**
209
+ - Increase `MAX_CONCURRENT_JOBS` for parallel processing
210
+ - Add more Azure Cognitive Services units for higher throughput
211
+ - Consider Azure Container Instances for horizontal scaling
212
+
213
+ ## 🛠️ Troubleshooting
214
+
215
+ ### Common Issues:
216
+
217
+ **AI Features Not Available:**
218
+ ```python
219
+ # Check this message in logs:
220
+ "⚠️ AI Summary features not available: ImportError"
221
+ ```
222
+ - Verify all dependencies installed: `pip install -r requirements.txt`
223
+ - Check Azure service credentials in `.env`
224
+ - Confirm network access to Azure endpoints
225
+
226
+ **Frame Extraction Failing:**
227
+ - Install OpenCV properly: `pip install opencv-python`
228
+ - Check video file format compatibility
229
+ - Verify sufficient disk space in `temp/` directory
230
+
231
+ **Document Processing Errors:**
232
+ - Install missing document processors: `pip install python-docx PyPDF2 openpyxl`
233
+ - Check file permissions and encoding
234
+ - Verify file formats are supported
235
+
236
+ **AI Summarization Timeouts:**
237
+ - Increase processing timeout in AI agent configuration
238
+ - Check Azure AI service quotas and limits
239
+ - Verify network connectivity to Azure AI endpoints
240
+
241
+ ### Debug Mode:
242
+
243
+ Enable detailed logging:
244
+ ```bash
245
+ export DEBUG=True
246
+ export LOG_LEVEL=DEBUG
247
+ ```
248
+
249
+ ### Health Check Endpoints:
250
+
251
+ The system includes built-in health checks for:
252
+ - Database connectivity
253
+ - Azure services authentication
254
+ - File processing pipeline
255
+ - AI agent availability
256
+
257
+ ## 📈 Monitoring & Analytics
258
+
259
+ **Built-in Metrics:**
260
+ - Processing success/failure rates
261
+ - Average processing times by file type
262
+ - User engagement with AI features
263
+ - Resource usage patterns
264
+
265
+ **Log Files:**
266
+ - `app.log` - Application events
267
+ - `ai_processing.log` - AI-specific operations
268
+ - `error.log` - Error tracking
269
+
270
+ ## 🔄 Migration from Previous Version
271
+
272
+ **Automatic Migration:**
273
+ - Existing transcription data preserved
274
+ - New database tables created automatically
275
+ - User accounts and permissions maintained
276
+ - Previous API endpoints remain functional
277
+
278
+ **Manual Steps Required:**
279
+ 1. Update environment variables with new API keys
280
+ 2. Install additional Python dependencies
281
+ 3. Restart application to initialize new services
282
+
283
+ ## 🎉 Testing the Enhanced Features
284
+
285
+ **Quick Test Sequence:**
286
+ 1. **Login** with existing account
287
+ 2. **Upload a short video** (2-3 minutes) with slides
288
+ 3. **Add a PDF document** related to the video content
289
+ 4. **Provide AI instructions** like "Create executive summary focusing on key decisions"
290
+ 5. **Monitor processing** through status updates
291
+ 6. **Download results** in markdown format
292
+
293
+ **Expected Results:**
294
+ - Video automatically transcribed with speaker identification
295
+ - Key slides extracted and analyzed with OCR
296
+ - PDF content integrated into analysis
297
+ - Comprehensive summary combining all sources
298
+ - Timestamps and action items identified
299
+
300
+ This enhanced system transforms basic transcription into comprehensive conference intelligence, making it suitable for enterprise meetings, academic research, and professional content analysis.
requirements.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core application dependencies
2
+ gradio>=4.0.0
3
+ python-dotenv
4
+ azure-storage-blob
5
+
6
+ # AI and Azure Cognitive Services
7
+ azure-ai-projects>=1.0.0b1
8
+ azure-identity>=1.15.0
9
+ azure-cognitiveservices-vision-computervision>=0.9.0
10
+ msrest>=0.7.1
11
+
12
+ # Token management for GPT models
13
+ tiktoken>=0.5.0
14
+
15
+ # File processing dependencies
16
+ PyPDF2>=3.0.0
17
+ python-docx>=1.1.0
18
+ openpyxl>=3.1.0
19
+ pandas>=2.0.0
20
+ python-pptx>=0.6.0
21
+ xlrd>=2.0.0
22
+ striprtf>=0.0.26
23
+ docx2txt>=0.8
24
+
25
+ # Image and video processing
26
+ opencv-python>=4.8.0
27
+ Pillow>=10.0.0
28
+
29
+ # Additional dependencies for robust file handling
30
+ pdfplumber>=0.9.0
31
+ python-magic-bin>=0.4.14 # For Windows file type detection
32
+ chardet>=5.0.0 # For encoding detection
33
+
34
+ # Optional but recommended for better performance
35
+ numpy>=1.24.0
36
+ scipy>=1.10.0
37
+
38
+ # Development and debugging (optional)
39
+ requests>=2.31.0
40
+ urllib3>=1.26.0