NitinBot001 commited on
Commit
7a832e3
·
verified ·
1 Parent(s): 64ac061

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -221
app.py CHANGED
@@ -1,221 +1,221 @@
1
- import os
2
- import io
3
- import json
4
- import uuid
5
- import wave
6
- import tempfile
7
- from datetime import datetime
8
- from typing import Optional, Dict, Any
9
- from pathlib import Path
10
-
11
- from fastapi import FastAPI, File, UploadFile, HTTPException
12
- from fastapi.middleware.cors import CORSMiddleware
13
- from fastapi.responses import JSONResponse
14
- from pydantic import BaseModel
15
- import uvicorn
16
- import requests
17
- import numpy as np
18
- from groq import Groq
19
- import dotenv
20
-
21
- # Load environment variables
22
- dotenv.load_dotenv()
23
-
24
- app = FastAPI(title="Voice AI Backend")
25
-
26
- # Mount static files
27
- app.mount("/static", StaticFiles(directory="static"), name="static")
28
-
29
-
30
- # CORS configuration
31
- app.add_middleware(
32
- CORSMiddleware,
33
- allow_origins=["*"], # Configure appropriately for production
34
- allow_credentials=True,
35
- allow_methods=["*"],
36
- allow_headers=["*"],
37
- )
38
-
39
- # Configuration
40
- GROQ_MODEL = "whisper-large-v3-turbo"
41
- AI_API_ENDPOINT = "https://nitinbot001-crop-rag-api.hf.space/api/query"
42
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
43
-
44
- # Initialize Groq client
45
- groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
46
-
47
- # Store conversation history (in production, use a database)
48
- conversation_history = []
49
-
50
- class TranscriptionResponse(BaseModel):
51
- success: bool
52
- user_query: str
53
- ai_response: str
54
- metadata: Dict[str, Any]
55
- session_id: str
56
- timestamp: str
57
- error: Optional[str] = None
58
-
59
- class ConversationHistory(BaseModel):
60
- sessions: list
61
-
62
- @app.get("/", response_class=HTMLResponse)
63
- async def read_root():
64
- return FileResponse('index.html')
65
-
66
- @app.post("/api/process-audio", response_model=TranscriptionResponse)
67
- async def process_audio(audio: UploadFile = File(...)):
68
- """
69
- Process audio file: transcribe and get AI response
70
- """
71
- session_id = str(uuid.uuid4())
72
- timestamp = datetime.now().isoformat()
73
-
74
- try:
75
- # Validate file type
76
- if not audio.filename.endswith(('.wav', '.webm', '.mp3', '.m4a', '.ogg')):
77
- raise HTTPException(status_code=400, detail="Invalid audio format")
78
-
79
- # Read audio data
80
- audio_data = await audio.read()
81
-
82
- # Save temporary file for processing
83
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
84
- # If it's webm (from browser), we need to save it as-is
85
- # Groq can handle webm directly
86
- if audio.filename.endswith('.webm'):
87
- tmp_file.write(audio_data)
88
- tmp_path = tmp_file.name
89
- else:
90
- # For wav files, write directly
91
- tmp_file.write(audio_data)
92
- tmp_path = tmp_file.name
93
-
94
- # Transcribe with Groq
95
- user_query = await transcribe_audio(tmp_path, audio.filename)
96
-
97
- # Get AI response
98
- ai_response = await get_ai_response(user_query)
99
-
100
- # Create metadata
101
- metadata = {
102
- "audio_size": len(audio_data),
103
- "audio_format": audio.filename.split('.')[-1],
104
- "transcription_model": GROQ_MODEL,
105
- "ai_endpoint": AI_API_ENDPOINT,
106
- "processing_time": datetime.now().isoformat(),
107
- }
108
-
109
- # Store in history
110
- conversation_history.append({
111
- "session_id": session_id,
112
- "timestamp": timestamp,
113
- "user_query": user_query,
114
- "ai_response": ai_response,
115
- "metadata": metadata
116
- })
117
-
118
- # Clean up
119
- os.unlink(tmp_path)
120
-
121
- return TranscriptionResponse(
122
- success=True,
123
- user_query=user_query,
124
- ai_response=ai_response,
125
- metadata=metadata,
126
- session_id=session_id,
127
- timestamp=timestamp
128
- )
129
-
130
- except Exception as e:
131
- return TranscriptionResponse(
132
- success=False,
133
- user_query="",
134
- ai_response="",
135
- metadata={},
136
- session_id=session_id,
137
- timestamp=timestamp,
138
- error=str(e)
139
- )
140
-
141
- async def transcribe_audio(file_path: str, original_filename: str) -> str:
142
- """
143
- Transcribe audio using Groq Whisper
144
- """
145
- if not groq_client:
146
- raise HTTPException(status_code=500, detail="GROQ_API_KEY not configured")
147
-
148
- try:
149
- with open(file_path, "rb") as audio_file:
150
- transcription = groq_client.audio.transcriptions.create(
151
- file=(original_filename, audio_file.read()),
152
- model=GROQ_MODEL,
153
- response_format="text"
154
- )
155
-
156
- # Handle different response formats
157
- if hasattr(transcription, 'text'):
158
- text = transcription.text
159
- elif isinstance(transcription, dict):
160
- text = transcription.get('text', '')
161
- else:
162
- text = str(transcription)
163
-
164
- return text.strip()
165
-
166
- except Exception as e:
167
- raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
168
-
169
- async def get_ai_response(query: str) -> str:
170
- """
171
- Get response from AI API
172
- """
173
- try:
174
- headers = {"Content-Type": "application/json"}
175
- payload = {"query": query}
176
-
177
- response = requests.post(
178
- AI_API_ENDPOINT,
179
- json=payload,
180
- headers=headers,
181
- timeout=30
182
- )
183
- response.raise_for_status()
184
-
185
- result = response.json()
186
-
187
- # Extract text from response (adjust based on actual API response format)
188
- if isinstance(result, dict):
189
- # Try different possible response keys
190
- ai_text = result.get('response',
191
- result.get('answer',
192
- result.get('text',
193
- result.get('message', str(result)))))
194
- else:
195
- ai_text = str(result)
196
-
197
- return ai_text
198
-
199
- except requests.exceptions.Timeout:
200
- return "I'm sorry, the AI service is taking too long to respond. Please try again."
201
- except Exception as e:
202
- return f"I encountered an error while processing your request: {str(e)}"
203
-
204
- @app.get("/api/history", response_model=ConversationHistory)
205
- async def get_history():
206
- """
207
- Get conversation history
208
- """
209
- return ConversationHistory(sessions=conversation_history[-20:]) # Last 20 conversations
210
-
211
- @app.delete("/api/history")
212
- async def clear_history():
213
- """
214
- Clear conversation history
215
- """
216
- global conversation_history
217
- conversation_history = []
218
- return {"message": "History cleared"}
219
-
220
- if __name__ == "__main__":
221
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ import os
2
+ import io
3
+ import json
4
+ import uuid
5
+ import wave
6
+ import tempfile
7
+ from datetime import datetime
8
+ from typing import Optional, Dict, Any
9
+ from pathlib import Path
10
+
11
+ from fastapi import FastAPI, File, UploadFile, HTTPException
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import JSONResponse
14
+ from pydantic import BaseModel
15
+ import uvicorn
16
+ import requests
17
+ import numpy as np
18
+ from groq import Groq
19
+ import dotenv
20
+
21
+ # Load environment variables
22
+ dotenv.load_dotenv()
23
+
24
+ app = FastAPI(title="Voice AI Backend")
25
+
26
+ # Mount static files
27
+ app.mount("/static", StaticFiles(directory="static"), name="static")
28
+
29
+
30
+ # CORS configuration
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=["*"], # Configure appropriately for production
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+
39
+ # Configuration
40
+ GROQ_MODEL = "whisper-large-v3-turbo"
41
+ AI_API_ENDPOINT = "https://nitinbot001-crop-rag-api.hf.space/api/query"
42
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
43
+
44
+ # Initialize Groq client
45
+ groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
46
+
47
+ # Store conversation history (in production, use a database)
48
+ conversation_history = []
49
+
50
+ class TranscriptionResponse(BaseModel):
51
+ success: bool
52
+ user_query: str
53
+ ai_response: str
54
+ metadata: Dict[str, Any]
55
+ session_id: str
56
+ timestamp: str
57
+ error: Optional[str] = None
58
+
59
+ class ConversationHistory(BaseModel):
60
+ sessions: list
61
+
62
+ @app.get("/", response_class=HTMLResponse)
63
+ async def read_root():
64
+ return FileResponse('index.html')
65
+
66
+ @app.post("/api/process-audio", response_model=TranscriptionResponse)
67
+ async def process_audio(audio: UploadFile = File(...)):
68
+ """
69
+ Process audio file: transcribe and get AI response
70
+ """
71
+ session_id = str(uuid.uuid4())
72
+ timestamp = datetime.now().isoformat()
73
+
74
+ try:
75
+ # Validate file type
76
+ if not audio.filename.endswith(('.wav', '.webm', '.mp3', '.m4a', '.ogg')):
77
+ raise HTTPException(status_code=400, detail="Invalid audio format")
78
+
79
+ # Read audio data
80
+ audio_data = await audio.read()
81
+
82
+ # Save temporary file for processing
83
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
84
+ # If it's webm (from browser), we need to save it as-is
85
+ # Groq can handle webm directly
86
+ if audio.filename.endswith('.webm'):
87
+ tmp_file.write(audio_data)
88
+ tmp_path = tmp_file.name
89
+ else:
90
+ # For wav files, write directly
91
+ tmp_file.write(audio_data)
92
+ tmp_path = tmp_file.name
93
+
94
+ # Transcribe with Groq
95
+ user_query = await transcribe_audio(tmp_path, audio.filename)
96
+
97
+ # Get AI response
98
+ ai_response = await get_ai_response(user_query)
99
+
100
+ # Create metadata
101
+ metadata = {
102
+ "audio_size": len(audio_data),
103
+ "audio_format": audio.filename.split('.')[-1],
104
+ "transcription_model": GROQ_MODEL,
105
+ "ai_endpoint": AI_API_ENDPOINT,
106
+ "processing_time": datetime.now().isoformat(),
107
+ }
108
+
109
+ # Store in history
110
+ conversation_history.append({
111
+ "session_id": session_id,
112
+ "timestamp": timestamp,
113
+ "user_query": user_query,
114
+ "ai_response": ai_response,
115
+ "metadata": metadata
116
+ })
117
+
118
+ # Clean up
119
+ os.unlink(tmp_path)
120
+
121
+ return TranscriptionResponse(
122
+ success=True,
123
+ user_query=user_query,
124
+ ai_response=ai_response,
125
+ metadata=metadata,
126
+ session_id=session_id,
127
+ timestamp=timestamp
128
+ )
129
+
130
+ except Exception as e:
131
+ return TranscriptionResponse(
132
+ success=False,
133
+ user_query="",
134
+ ai_response="",
135
+ metadata={},
136
+ session_id=session_id,
137
+ timestamp=timestamp,
138
+ error=str(e)
139
+ )
140
+
141
+ async def transcribe_audio(file_path: str, original_filename: str) -> str:
142
+ """
143
+ Transcribe audio using Groq Whisper
144
+ """
145
+ if not groq_client:
146
+ raise HTTPException(status_code=500, detail="GROQ_API_KEY not configured")
147
+
148
+ try:
149
+ with open(file_path, "rb") as audio_file:
150
+ transcription = groq_client.audio.transcriptions.create(
151
+ file=(original_filename, audio_file.read()),
152
+ model=GROQ_MODEL,
153
+ response_format="text"
154
+ )
155
+
156
+ # Handle different response formats
157
+ if hasattr(transcription, 'text'):
158
+ text = transcription.text
159
+ elif isinstance(transcription, dict):
160
+ text = transcription.get('text', '')
161
+ else:
162
+ text = str(transcription)
163
+
164
+ return text.strip()
165
+
166
+ except Exception as e:
167
+ raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
168
+
169
+ async def get_ai_response(query: str) -> str:
170
+ """
171
+ Get response from AI API
172
+ """
173
+ try:
174
+ headers = {"Content-Type": "application/json"}
175
+ payload = {"query": query}
176
+
177
+ response = requests.post(
178
+ AI_API_ENDPOINT,
179
+ json=payload,
180
+ headers=headers,
181
+ timeout=30
182
+ )
183
+ response.raise_for_status()
184
+
185
+ result = response.json()
186
+
187
+ # Extract text from response (adjust based on actual API response format)
188
+ if isinstance(result, dict):
189
+ # Try different possible response keys
190
+ ai_text = result.get('response',
191
+ result.get('answer',
192
+ result.get('text',
193
+ result.get('message', str(result)))))
194
+ else:
195
+ ai_text = str(result)
196
+
197
+ return ai_text
198
+
199
+ except requests.exceptions.Timeout:
200
+ return "I'm sorry, the AI service is taking too long to respond. Please try again."
201
+ except Exception as e:
202
+ return f"I encountered an error while processing your request: {str(e)}"
203
+
204
+ @app.get("/api/history", response_model=ConversationHistory)
205
+ async def get_history():
206
+ """
207
+ Get conversation history
208
+ """
209
+ return ConversationHistory(sessions=conversation_history[-20:]) # Last 20 conversations
210
+
211
+ @app.delete("/api/history")
212
+ async def clear_history():
213
+ """
214
+ Clear conversation history
215
+ """
216
+ global conversation_history
217
+ conversation_history = []
218
+ return {"message": "History cleared"}
219
+
220
+ if __name__ == "__main__":
221
+ uvicorn.run(app, host="0.0.0.0", port=7860)