NitinBot001 commited on
Commit
64ac061
·
verified ·
1 Parent(s): 34b7ddf

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +27 -0
  2. app.py +221 -0
  3. index.html +713 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image as base
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables
8
+ ENV PYTHONDONTWRITEBYTECODE 1
9
+ ENV PYTHONUNBUFFERED 1
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ build-essential \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install Python dependencies
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Expose the port the app runs on
24
+ EXPOSE 7860
25
+
26
+ # Command to run the application
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860
app.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import json
4
+ import uuid
5
+ import wave
6
+ import tempfile
7
+ from datetime import datetime
8
+ from typing import Optional, Dict, Any
9
+ from pathlib import Path
10
+
11
+ from fastapi import FastAPI, File, UploadFile, HTTPException
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import JSONResponse
14
+ from pydantic import BaseModel
15
+ import uvicorn
16
+ import requests
17
+ import numpy as np
18
+ from groq import Groq
19
+ import dotenv
20
+
21
+ # Load environment variables
22
+ dotenv.load_dotenv()
23
+
24
+ app = FastAPI(title="Voice AI Backend")
25
+
26
+ # Mount static files
27
+ app.mount("/static", StaticFiles(directory="static"), name="static")
28
+
29
+
30
+ # CORS configuration
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=["*"], # Configure appropriately for production
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+
39
+ # Configuration
40
+ GROQ_MODEL = "whisper-large-v3-turbo"
41
+ AI_API_ENDPOINT = "https://nitinbot001-crop-rag-api.hf.space/api/query"
42
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
43
+
44
+ # Initialize Groq client
45
+ groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
46
+
47
+ # Store conversation history (in production, use a database)
48
+ conversation_history = []
49
+
50
+ class TranscriptionResponse(BaseModel):
51
+ success: bool
52
+ user_query: str
53
+ ai_response: str
54
+ metadata: Dict[str, Any]
55
+ session_id: str
56
+ timestamp: str
57
+ error: Optional[str] = None
58
+
59
+ class ConversationHistory(BaseModel):
60
+ sessions: list
61
+
62
+ @app.get("/", response_class=HTMLResponse)
63
+ async def read_root():
64
+ return FileResponse('index.html')
65
+
66
+ @app.post("/api/process-audio", response_model=TranscriptionResponse)
67
+ async def process_audio(audio: UploadFile = File(...)):
68
+ """
69
+ Process audio file: transcribe and get AI response
70
+ """
71
+ session_id = str(uuid.uuid4())
72
+ timestamp = datetime.now().isoformat()
73
+
74
+ try:
75
+ # Validate file type
76
+ if not audio.filename.endswith(('.wav', '.webm', '.mp3', '.m4a', '.ogg')):
77
+ raise HTTPException(status_code=400, detail="Invalid audio format")
78
+
79
+ # Read audio data
80
+ audio_data = await audio.read()
81
+
82
+ # Save temporary file for processing
83
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
84
+ # If it's webm (from browser), we need to save it as-is
85
+ # Groq can handle webm directly
86
+ if audio.filename.endswith('.webm'):
87
+ tmp_file.write(audio_data)
88
+ tmp_path = tmp_file.name
89
+ else:
90
+ # For wav files, write directly
91
+ tmp_file.write(audio_data)
92
+ tmp_path = tmp_file.name
93
+
94
+ # Transcribe with Groq
95
+ user_query = await transcribe_audio(tmp_path, audio.filename)
96
+
97
+ # Get AI response
98
+ ai_response = await get_ai_response(user_query)
99
+
100
+ # Create metadata
101
+ metadata = {
102
+ "audio_size": len(audio_data),
103
+ "audio_format": audio.filename.split('.')[-1],
104
+ "transcription_model": GROQ_MODEL,
105
+ "ai_endpoint": AI_API_ENDPOINT,
106
+ "processing_time": datetime.now().isoformat(),
107
+ }
108
+
109
+ # Store in history
110
+ conversation_history.append({
111
+ "session_id": session_id,
112
+ "timestamp": timestamp,
113
+ "user_query": user_query,
114
+ "ai_response": ai_response,
115
+ "metadata": metadata
116
+ })
117
+
118
+ # Clean up
119
+ os.unlink(tmp_path)
120
+
121
+ return TranscriptionResponse(
122
+ success=True,
123
+ user_query=user_query,
124
+ ai_response=ai_response,
125
+ metadata=metadata,
126
+ session_id=session_id,
127
+ timestamp=timestamp
128
+ )
129
+
130
+ except Exception as e:
131
+ return TranscriptionResponse(
132
+ success=False,
133
+ user_query="",
134
+ ai_response="",
135
+ metadata={},
136
+ session_id=session_id,
137
+ timestamp=timestamp,
138
+ error=str(e)
139
+ )
140
+
141
+ async def transcribe_audio(file_path: str, original_filename: str) -> str:
142
+ """
143
+ Transcribe audio using Groq Whisper
144
+ """
145
+ if not groq_client:
146
+ raise HTTPException(status_code=500, detail="GROQ_API_KEY not configured")
147
+
148
+ try:
149
+ with open(file_path, "rb") as audio_file:
150
+ transcription = groq_client.audio.transcriptions.create(
151
+ file=(original_filename, audio_file.read()),
152
+ model=GROQ_MODEL,
153
+ response_format="text"
154
+ )
155
+
156
+ # Handle different response formats
157
+ if hasattr(transcription, 'text'):
158
+ text = transcription.text
159
+ elif isinstance(transcription, dict):
160
+ text = transcription.get('text', '')
161
+ else:
162
+ text = str(transcription)
163
+
164
+ return text.strip()
165
+
166
+ except Exception as e:
167
+ raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
168
+
169
+ async def get_ai_response(query: str) -> str:
170
+ """
171
+ Get response from AI API
172
+ """
173
+ try:
174
+ headers = {"Content-Type": "application/json"}
175
+ payload = {"query": query}
176
+
177
+ response = requests.post(
178
+ AI_API_ENDPOINT,
179
+ json=payload,
180
+ headers=headers,
181
+ timeout=30
182
+ )
183
+ response.raise_for_status()
184
+
185
+ result = response.json()
186
+
187
+ # Extract text from response (adjust based on actual API response format)
188
+ if isinstance(result, dict):
189
+ # Try different possible response keys
190
+ ai_text = result.get('response',
191
+ result.get('answer',
192
+ result.get('text',
193
+ result.get('message', str(result)))))
194
+ else:
195
+ ai_text = str(result)
196
+
197
+ return ai_text
198
+
199
+ except requests.exceptions.Timeout:
200
+ return "I'm sorry, the AI service is taking too long to respond. Please try again."
201
+ except Exception as e:
202
+ return f"I encountered an error while processing your request: {str(e)}"
203
+
204
+ @app.get("/api/history", response_model=ConversationHistory)
205
+ async def get_history():
206
+ """
207
+ Get conversation history
208
+ """
209
+ return ConversationHistory(sessions=conversation_history[-20:]) # Last 20 conversations
210
+
211
+ @app.delete("/api/history")
212
+ async def clear_history():
213
+ """
214
+ Clear conversation history
215
+ """
216
+ global conversation_history
217
+ conversation_history = []
218
+ return {"message": "History cleared"}
219
+
220
+ if __name__ == "__main__":
221
+ uvicorn.run(app, host="0.0.0.0", port=8000)
index.html ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice AI Assistant</title>
7
+ <style>
8
+ :root {
9
+ --primary-color: #4a90e2;
10
+ --success-color: #52c41a;
11
+ --danger-color: #ff4d4f;
12
+ --bg-color: #f0f2f5;
13
+ --card-bg: #ffffff;
14
+ --text-color: #333333;
15
+ --border-color: #d9d9d9;
16
+ }
17
+
18
+ * {
19
+ margin: 0;
20
+ padding: 0;
21
+ box-sizing: border-box;
22
+ }
23
+
24
+ body {
25
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
26
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
27
+ min-height: 100vh;
28
+ color: var(--text-color);
29
+ }
30
+
31
+ .container {
32
+ max-width: 900px;
33
+ margin: 0 auto;
34
+ padding: 20px;
35
+ }
36
+
37
+ header {
38
+ text-align: center;
39
+ color: white;
40
+ margin-bottom: 30px;
41
+ }
42
+
43
+ header h1 {
44
+ font-size: 2.5em;
45
+ margin-bottom: 10px;
46
+ }
47
+
48
+ .subtitle {
49
+ font-size: 1.1em;
50
+ opacity: 0.9;
51
+ }
52
+
53
+ .main-card {
54
+ background: var(--card-bg);
55
+ border-radius: 20px;
56
+ padding: 30px;
57
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.1);
58
+ margin-bottom: 30px;
59
+ }
60
+
61
+ .recording-section {
62
+ text-align: center;
63
+ margin-bottom: 30px;
64
+ }
65
+
66
+ .record-btn {
67
+ width: 150px;
68
+ height: 150px;
69
+ border-radius: 50%;
70
+ border: none;
71
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
72
+ color: white;
73
+ cursor: pointer;
74
+ display: flex;
75
+ flex-direction: column;
76
+ align-items: center;
77
+ justify-content: center;
78
+ margin: 0 auto 20px;
79
+ transition: all 0.3s ease;
80
+ box-shadow: 0 5px 20px rgba(102, 126, 234, 0.4);
81
+ }
82
+
83
+ .record-btn:hover {
84
+ transform: scale(1.05);
85
+ box-shadow: 0 8px 30px rgba(102, 126, 234, 0.6);
86
+ }
87
+
88
+ .record-btn.recording {
89
+ background: linear-gradient(135deg, #ff4d4f 0%, #ff7875 100%);
90
+ animation: pulse 1.5s infinite;
91
+ }
92
+
93
+ @keyframes pulse {
94
+ 0% { transform: scale(1); }
95
+ 50% { transform: scale(1.05); }
96
+ 100% { transform: scale(1); }
97
+ }
98
+
99
+ .record-icon {
100
+ font-size: 3em;
101
+ margin-bottom: 10px;
102
+ }
103
+
104
+ .visualizer {
105
+ margin: 20px 0;
106
+ height: 100px;
107
+ background: #f5f5f5;
108
+ border-radius: 10px;
109
+ overflow: hidden;
110
+ }
111
+
112
+ .visualizer canvas {
113
+ width: 100%;
114
+ height: 100%;
115
+ }
116
+
117
+ .status {
118
+ font-size: 1.1em;
119
+ color: #666;
120
+ margin-top: 10px;
121
+ }
122
+
123
+ .status.recording {
124
+ color: var(--danger-color);
125
+ font-weight: bold;
126
+ }
127
+
128
+ .status.processing {
129
+ color: var(--primary-color);
130
+ }
131
+
132
+ .status.success {
133
+ color: var(--success-color);
134
+ }
135
+
136
+ .tts-controls {
137
+ display: flex;
138
+ align-items: center;
139
+ gap: 15px;
140
+ margin-bottom: 20px;
141
+ padding: 15px;
142
+ background: #f5f5f5;
143
+ border-radius: 10px;
144
+ }
145
+
146
+ .switch {
147
+ position: relative;
148
+ display: inline-block;
149
+ width: 50px;
150
+ height: 24px;
151
+ }
152
+
153
+ .switch input {
154
+ opacity: 0;
155
+ width: 0;
156
+ height: 0;
157
+ }
158
+
159
+ .slider {
160
+ position: absolute;
161
+ cursor: pointer;
162
+ top: 0;
163
+ left: 0;
164
+ right: 0;
165
+ bottom: 0;
166
+ background-color: #ccc;
167
+ transition: .4s;
168
+ border-radius: 24px;
169
+ }
170
+
171
+ .slider:before {
172
+ position: absolute;
173
+ content: "";
174
+ height: 18px;
175
+ width: 18px;
176
+ left: 3px;
177
+ bottom: 3px;
178
+ background-color: white;
179
+ transition: .4s;
180
+ border-radius: 50%;
181
+ }
182
+
183
+ input:checked + .slider {
184
+ background-color: var(--primary-color);
185
+ }
186
+
187
+ input:checked + .slider:before {
188
+ transform: translateX(26px);
189
+ }
190
+
191
+ .voice-select {
192
+ padding: 8px 12px;
193
+ border: 1px solid var(--border-color);
194
+ border-radius: 5px;
195
+ background: white;
196
+ font-size: 14px;
197
+ }
198
+
199
+ .conversation-display {
200
+ margin-top: 30px;
201
+ padding: 20px;
202
+ background: #f9f9f9;
203
+ border-radius: 10px;
204
+ }
205
+
206
+ .user-query, .ai-response {
207
+ margin-bottom: 20px;
208
+ }
209
+
210
+ .user-query h3, .ai-response h3 {
211
+ color: var(--primary-color);
212
+ margin-bottom: 10px;
213
+ font-size: 1.1em;
214
+ }
215
+
216
+ .user-query p, .ai-response p {
217
+ line-height: 1.6;
218
+ color: var(--text-color);
219
+ }
220
+
221
+ .speak-btn {
222
+ margin-top: 10px;
223
+ padding: 8px 16px;
224
+ background: var(--primary-color);
225
+ color: white;
226
+ border: none;
227
+ border-radius: 5px;
228
+ cursor: pointer;
229
+ font-size: 14px;
230
+ transition: background 0.3s;
231
+ }
232
+
233
+ .speak-btn:hover {
234
+ background: #3a7bc8;
235
+ }
236
+
237
+ .metadata {
238
+ margin-top: 20px;
239
+ padding: 15px;
240
+ background: #f5f5f5;
241
+ border-radius: 10px;
242
+ }
243
+
244
+ .metadata h4 {
245
+ margin-bottom: 10px;
246
+ color: #666;
247
+ }
248
+
249
+ .metadata pre {
250
+ font-size: 12px;
251
+ color: #666;
252
+ white-space: pre-wrap;
253
+ word-wrap: break-word;
254
+ }
255
+
256
+ .history-section {
257
+ background: var(--card-bg);
258
+ border-radius: 20px;
259
+ padding: 25px;
260
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.1);
261
+ }
262
+
263
+ .history-header {
264
+ display: flex;
265
+ justify-content: space-between;
266
+ align-items: center;
267
+ margin-bottom: 20px;
268
+ }
269
+
270
+ .history-header h2 {
271
+ color: var(--primary-color);
272
+ }
273
+
274
+ .clear-btn {
275
+ padding: 8px 16px;
276
+ background: var(--danger-color);
277
+ color: white;
278
+ border: none;
279
+ border-radius: 5px;
280
+ cursor: pointer;
281
+ font-size: 14px;
282
+ }
283
+
284
+ .clear-btn:hover {
285
+ background: #ff7875;
286
+ }
287
+
288
+ .history-list {
289
+ max-height: 400px;
290
+ overflow-y: auto;
291
+ }
292
+
293
+ .history-item {
294
+ padding: 15px;
295
+ margin-bottom: 10px;
296
+ background: #f9f9f9;
297
+ border-radius: 10px;
298
+ border-left: 4px solid var(--primary-color);
299
+ }
300
+
301
+ .history-item .timestamp {
302
+ font-size: 12px;
303
+ color: #999;
304
+ margin-bottom: 5px;
305
+ }
306
+
307
+ .history-item .query {
308
+ font-weight: 500;
309
+ margin-bottom: 5px;
310
+ }
311
+
312
+ .history-item .response {
313
+ color: #666;
314
+ font-size: 14px;
315
+ }
316
+
317
+ .hidden {
318
+ display: none !important;
319
+ }
320
+
321
+ .error {
322
+ color: var(--danger-color);
323
+ padding: 10px;
324
+ background: #fff2f0;
325
+ border-radius: 5px;
326
+ margin-top: 10px;
327
+ }
328
+
329
+ @media (max-width: 768px) {
330
+ .container {
331
+ padding: 10px;
332
+ }
333
+
334
+ header h1 {
335
+ font-size: 2em;
336
+ }
337
+
338
+ .record-btn {
339
+ width: 120px;
340
+ height: 120px;
341
+ }
342
+
343
+ .record-icon {
344
+ font-size: 2.5em;
345
+ }
346
+ }
347
+ </style>
348
+ </head>
349
+ <body>
350
+ <div class="container">
351
+ <header>
352
+ <h1>🎙️ Voice AI Assistant</h1>
353
+ <p class="subtitle">Ask questions using your voice</p>
354
+ </header>
355
+
356
+ <div class="main-card">
357
+ <!-- Recording Controls -->
358
+ <div class="recording-section">
359
+ <button id="recordBtn" class="record-btn">
360
+ <span class="record-icon">🎤</span>
361
+ <span class="record-text">Start Recording</span>
362
+ </button>
363
+
364
+ <div id="visualizer" class="visualizer hidden">
365
+ <canvas id="waveform"></canvas>
366
+ </div>
367
+
368
+ <div id="status" class="status">
369
+ Ready to record
370
+ </div>
371
+ </div>
372
+
373
+ <!-- TTS Controls -->
374
+ <div class="tts-controls">
375
+ <label class="switch">
376
+ <input type="checkbox" id="autoSpeak" checked>
377
+ <span class="slider"></span>
378
+ </label>
379
+ <span>Auto-speak responses</span>
380
+
381
+ <select id="voiceSelect" class="voice-select">
382
+ <option value="">Default Voice</option>
383
+ </select>
384
+ </div>
385
+
386
+ <!-- Current Conversation -->
387
+ <div id="currentConversation" class="conversation-display hidden">
388
+ <div class="user-query">
389
+ <h3>You asked:</h3>
390
+ <p id="userText"></p>
391
+ </div>
392
+ <div class="ai-response">
393
+ <h3>AI Response:</h3>
394
+ <p id="aiText"></p>
395
+ <button id="speakBtn" class="speak-btn">🔊 Speak</button>
396
+ </div>
397
+ </div>
398
+
399
+ <!-- Metadata Display -->
400
+ <div id="metadata" class="metadata hidden">
401
+ <h4>Session Details</h4>
402
+ <pre id="metadataContent"></pre>
403
+ </div>
404
+ </div>
405
+
406
+ <!-- Conversation History -->
407
+ <div class="history-section">
408
+ <div class="history-header">
409
+ <h2>Conversation History</h2>
410
+ <button id="clearHistory" class="clear-btn">Clear All</button>
411
+ </div>
412
+ <div id="historyList" class="history-list"></div>
413
+ </div>
414
+ </div>
415
+
416
+ <script>
417
+ class VoiceAIApp {
418
+ constructor() {
419
+ this.backendUrl = 'http://localhost:8000';
420
+ this.mediaRecorder = null;
421
+ this.audioChunks = [];
422
+ this.isRecording = false;
423
+ this.recognition = null;
424
+ this.synthesis = window.speechSynthesis;
425
+ this.voices = [];
426
+ this.currentSession = null;
427
+
428
+ this.initializeElements();
429
+ this.initializeEventListeners();
430
+ this.loadVoices();
431
+ this.loadHistory();
432
+ }
433
+
434
+ initializeElements() {
435
+ this.elements = {
436
+ recordBtn: document.getElementById('recordBtn'),
437
+ status: document.getElementById('status'),
438
+ visualizer: document.getElementById('visualizer'),
439
+ waveform: document.getElementById('waveform'),
440
+ autoSpeak: document.getElementById('autoSpeak'),
441
+ voiceSelect: document.getElementById('voiceSelect'),
442
+ currentConversation: document.getElementById('currentConversation'),
443
+ userText: document.getElementById('userText'),
444
+ aiText: document.getElementById('aiText'),
445
+ speakBtn: document.getElementById('speakBtn'),
446
+ metadata: document.getElementById('metadata'),
447
+ metadataContent: document.getElementById('metadataContent'),
448
+ historyList: document.getElementById('historyList'),
449
+ clearHistory: document.getElementById('clearHistory')
450
+ };
451
+ }
452
+
453
+ initializeEventListeners() {
454
+ this.elements.recordBtn.addEventListener('click', () => this.toggleRecording());
455
+ this.elements.speakBtn.addEventListener('click', () => this.speakResponse());
456
+ this.elements.clearHistory.addEventListener('click', () => this.clearHistory());
457
+
458
+ // Load voices when they change
459
+ this.synthesis.addEventListener('voiceschanged', () => this.loadVoices());
460
+ }
461
+
462
+ loadVoices() {
463
+ this.voices = this.synthesis.getVoices();
464
+ this.elements.voiceSelect.innerHTML = '<option value="">Default Voice</option>';
465
+
466
+ this.voices.forEach((voice, index) => {
467
+ const option = document.createElement('option');
468
+ option.value = index;
469
+ option.textContent = `${voice.name} (${voice.lang})`;
470
+ this.elements.voiceSelect.appendChild(option);
471
+ });
472
+ }
473
+
474
+ async toggleRecording() {
475
+ if (this.isRecording) {
476
+ this.stopRecording();
477
+ } else {
478
+ this.startRecording();
479
+ }
480
+ }
481
+
482
+ async startRecording() {
483
+ try {
484
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
485
+
486
+ // Setup MediaRecorder
487
+ const mimeType = 'audio/webm';
488
+ this.mediaRecorder = new MediaRecorder(stream, { mimeType });
489
+ this.audioChunks = [];
490
+
491
+ this.mediaRecorder.ondataavailable = (event) => {
492
+ if (event.data.size > 0) {
493
+ this.audioChunks.push(event.data);
494
+ }
495
+ };
496
+
497
+ this.mediaRecorder.onstop = async () => {
498
+ const audioBlob = new Blob(this.audioChunks, { type: mimeType });
499
+ await this.processAudio(audioBlob);
500
+ stream.getTracks().forEach(track => track.stop());
501
+ };
502
+
503
+ this.mediaRecorder.start();
504
+ this.isRecording = true;
505
+
506
+ // Update UI
507
+ this.elements.recordBtn.classList.add('recording');
508
+ this.elements.recordBtn.querySelector('.record-text').textContent = 'Stop Recording';
509
+ this.elements.status.textContent = 'Recording... Speak now';
510
+ this.elements.status.className = 'status recording';
511
+ this.elements.visualizer.classList.remove('hidden');
512
+
513
+ // Start visualizer
514
+ this.startVisualizer(stream);
515
+
516
+ } catch (error) {
517
+ console.error('Error accessing microphone:', error);
518
+ this.showError('Could not access microphone. Please check permissions.');
519
+ }
520
+ }
521
+
522
+ stopRecording() {
523
+ if (this.mediaRecorder && this.isRecording) {
524
+ this.mediaRecorder.stop();
525
+ this.isRecording = false;
526
+
527
+ // Update UI
528
+ this.elements.recordBtn.classList.remove('recording');
529
+ this.elements.recordBtn.querySelector('.record-text').textContent = 'Start Recording';
530
+ this.elements.status.textContent = 'Processing audio...';
531
+ this.elements.status.className = 'status processing';
532
+ this.elements.visualizer.classList.add('hidden');
533
+ }
534
+ }
535
+
536
+ startVisualizer(stream) {
537
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
538
+ const analyser = audioContext.createAnalyser();
539
+ const microphone = audioContext.createMediaStreamSource(stream);
540
+ const canvas = this.elements.waveform;
541
+ const ctx = canvas.getContext('2d');
542
+
543
+ analyser.fftSize = 256;
544
+ microphone.connect(analyser);
545
+
546
+ const bufferLength = analyser.frequencyBinCount;
547
+ const dataArray = new Uint8Array(bufferLength);
548
+
549
+ canvas.width = canvas.offsetWidth;
550
+ canvas.height = canvas.offsetHeight;
551
+
552
+ const draw = () => {
553
+ if (!this.isRecording) return;
554
+
555
+ requestAnimationFrame(draw);
556
+
557
+ analyser.getByteFrequencyData(dataArray);
558
+
559
+ ctx.fillStyle = '#f5f5f5';
560
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
561
+
562
+ const barWidth = (canvas.width / bufferLength) * 2.5;
563
+ let barHeight;
564
+ let x = 0;
565
+
566
+ for (let i = 0; i < bufferLength; i++) {
567
+ barHeight = (dataArray[i] / 255) * canvas.height;
568
+
569
+ const gradient = ctx.createLinearGradient(0, 0, 0, canvas.height);
570
+ gradient.addColorStop(0, '#667eea');
571
+ gradient.addColorStop(1, '#764ba2');
572
+
573
+ ctx.fillStyle = gradient;
574
+ ctx.fillRect(x, canvas.height - barHeight, barWidth, barHeight);
575
+
576
+ x += barWidth + 1;
577
+ }
578
+ };
579
+
580
+ draw();
581
+ }
582
+
583
+ async processAudio(audioBlob) {
584
+ try {
585
+ const formData = new FormData();
586
+ formData.append('audio', audioBlob, 'recording.webm');
587
+
588
+ const response = await fetch(`${this.backendUrl}/api/process-audio`, {
589
+ method: 'POST',
590
+ body: formData
591
+ });
592
+
593
+ if (!response.ok) {
594
+ throw new Error(`HTTP error! status: ${response.status}`);
595
+ }
596
+
597
+ const data = await response.json();
598
+
599
+ if (data.success) {
600
+ this.displayConversation(data);
601
+ this.saveToHistory(data);
602
+
603
+ if (this.elements.autoSpeak.checked) {
604
+ this.speakText(data.ai_response);
605
+ }
606
+
607
+ this.elements.status.textContent = 'Success! Response received';
608
+ this.elements.status.className = 'status success';
609
+ } else {
610
+ throw new Error(data.error || 'Processing failed');
611
+ }
612
+
613
+ } catch (error) {
614
+ console.error('Error processing audio:', error);
615
+ this.showError(`Error: ${error.message}`);
616
+ }
617
+ }
618
+
619
+ displayConversation(data) {
620
+ this.currentSession = data;
621
+
622
+ this.elements.userText.textContent = data.user_query;
623
+ this.elements.aiText.textContent = data.ai_response;
624
+ this.elements.currentConversation.classList.remove('hidden');
625
+
626
+ // Display metadata
627
+ this.elements.metadataContent.textContent = JSON.stringify({
628
+ session_id: data.session_id,
629
+ timestamp: data.timestamp,
630
+ ...data.metadata
631
+ }, null, 2);
632
+ this.elements.metadata.classList.remove('hidden');
633
+ }
634
+
635
+ speakResponse() {
636
+ if (this.currentSession) {
637
+ this.speakText(this.currentSession.ai_response);
638
+ }
639
+ }
640
+
641
+ speakText(text) {
642
+ // Cancel any ongoing speech
643
+ this.synthesis.cancel();
644
+
645
+ const utterance = new SpeechSynthesisUtterance(text);
646
+
647
+ // Set voice if selected
648
+ const selectedVoiceIndex = this.elements.voiceSelect.value;
649
+ if (selectedVoiceIndex && this.voices[selectedVoiceIndex]) {
650
+ utterance.voice = this.voices[selectedVoiceIndex];
651
+ }
652
+
653
+ // Set speech parameters
654
+ utterance.rate = 0.9;
655
+ utterance.pitch = 1;
656
+ utterance.volume = 1;
657
+
658
+ this.synthesis.speak(utterance);
659
+ }
660
+
661
+ saveToHistory(data) {
662
+ // Update history display
663
+ this.loadHistory();
664
+ }
665
+
666
+ async loadHistory() {
667
+ try {
668
+ const response = await fetch(`${this.backendUrl}/api/history`);
669
+ const data = await response.json();
670
+
671
+ this.elements.historyList.innerHTML = '';
672
+
673
+ data.sessions.reverse().forEach(session => {
674
+ const item = document.createElement('div');
675
+ item.className = 'history-item';
676
+ item.innerHTML = `
677
+ <div class="timestamp">${new Date(session.timestamp).toLocaleString()}</div>
678
+ <div class="query"><strong>Q:</strong> ${session.user_query}</div>
679
+ <div class="response"><strong>A:</strong> ${session.ai_response}</div>
680
+ `;
681
+ this.elements.historyList.appendChild(item);
682
+ });
683
+ } catch (error) {
684
+ console.error('Error loading history:', error);
685
+ }
686
+ }
687
+
688
+ async clearHistory() {
689
+ if (confirm('Are you sure you want to clear all conversation history?')) {
690
+ try {
691
+ await fetch(`${this.backendUrl}/api/history`, {
692
+ method: 'DELETE'
693
+ });
694
+ this.elements.historyList.innerHTML = '';
695
+ } catch (error) {
696
+ console.error('Error clearing history:', error);
697
+ }
698
+ }
699
+ }
700
+
701
+ showError(message) {
702
+ this.elements.status.textContent = message;
703
+ this.elements.status.className = 'status error';
704
+ }
705
+ }
706
+
707
+ // Initialize app when DOM is loaded
708
+ document.addEventListener('DOMContentLoaded', () => {
709
+ new VoiceAIApp();
710
+ });
711
+ </script>
712
+ </body>
713
+ </html>
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ python-multipart==0.0.6
4
+ groq==0.11.0
5
+ python-dotenv==1.0.0
6
+ requests==2.31.0
7
+ numpy==1.24.3
8
+ pydantic==2.5.0