Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,471 +30,79 @@ import base64
|
|
| 30 |
from io import BytesIO
|
| 31 |
import struct
|
| 32 |
import cv2
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
def load_emotion_model(model_path):
|
| 36 |
-
try:
|
| 37 |
-
model = load_model(model_path)
|
| 38 |
-
print("Emotion model loaded successfully")
|
| 39 |
-
return model
|
| 40 |
-
except Exception as e:
|
| 41 |
-
print("Error loading emotion prediction model:", e)
|
| 42 |
-
return None
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 50 |
-
|
| 51 |
-
# Load MusicGen model
|
| 52 |
-
def load_musicgen_model():
|
| 53 |
-
try:
|
| 54 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 55 |
-
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
|
| 56 |
-
music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
|
| 57 |
-
music_model.to(device)
|
| 58 |
-
print("MusicGen model loaded successfully")
|
| 59 |
-
return processor, music_model, device
|
| 60 |
-
except Exception as e:
|
| 61 |
-
print("Error loading MusicGen model:", e)
|
| 62 |
-
return None, None, None
|
| 63 |
-
|
| 64 |
-
processor, music_model, device = load_musicgen_model()
|
| 65 |
-
|
| 66 |
-
# Function to chunk audio into segments
|
| 67 |
-
def chunk_audio(audio_path, chunk_duration=10):
|
| 68 |
-
"""Split audio into chunks and return list of chunk file paths"""
|
| 69 |
-
try:
|
| 70 |
-
# Load audio file
|
| 71 |
-
audio = AudioSegment.from_file(audio_path)
|
| 72 |
-
duration_ms = len(audio)
|
| 73 |
-
chunk_ms = chunk_duration * 1000
|
| 74 |
-
|
| 75 |
-
# Validate chunk duration
|
| 76 |
-
if chunk_duration <= 0:
|
| 77 |
-
raise ValueError("Chunk duration must be positive")
|
| 78 |
-
|
| 79 |
-
if chunk_duration > duration_ms / 1000:
|
| 80 |
-
# If chunk duration is longer than audio, return the whole audio
|
| 81 |
-
return [audio_path], 1
|
| 82 |
-
|
| 83 |
-
chunks = []
|
| 84 |
-
chunk_files = []
|
| 85 |
-
|
| 86 |
-
# Calculate number of chunks
|
| 87 |
-
num_chunks = math.ceil(duration_ms / chunk_ms)
|
| 88 |
-
|
| 89 |
-
for i in range(num_chunks):
|
| 90 |
-
start_ms = i * chunk_ms
|
| 91 |
-
end_ms = min((i + 1) * chunk_ms, duration_ms)
|
| 92 |
-
|
| 93 |
-
# Extract chunk
|
| 94 |
-
chunk = audio[start_ms:end_ms]
|
| 95 |
-
chunks.append(chunk)
|
| 96 |
-
|
| 97 |
-
# Save chunk to temporary file
|
| 98 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 99 |
-
chunk.export(tmp_file.name, format="wav")
|
| 100 |
-
chunk_files.append(tmp_file.name)
|
| 101 |
-
|
| 102 |
-
return chunk_files, num_chunks
|
| 103 |
-
|
| 104 |
-
except Exception as e:
|
| 105 |
-
print("Error chunking audio:", e)
|
| 106 |
-
# Return original file as single chunk if chunking fails
|
| 107 |
-
return [audio_path], 1
|
| 108 |
-
|
| 109 |
-
# Function to transcribe audio
|
| 110 |
-
def transcribe(wav_filepath):
|
| 111 |
-
try:
|
| 112 |
-
segments, _ = model2.transcribe(wav_filepath, beam_size=5)
|
| 113 |
-
return "".join([segment.text for segment in segments])
|
| 114 |
-
except Exception as e:
|
| 115 |
-
print("Error transcribing audio:", e)
|
| 116 |
-
return "Transcription failed"
|
| 117 |
-
|
| 118 |
-
# Function to extract MFCC features from audio
|
| 119 |
-
def extract_mfcc(wav_file_name):
|
| 120 |
-
try:
|
| 121 |
-
y, sr = librosa.load(wav_file_name)
|
| 122 |
-
mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
|
| 123 |
-
return mfccs
|
| 124 |
-
except Exception as e:
|
| 125 |
-
print("Error extracting MFCC features:", e)
|
| 126 |
-
return None
|
| 127 |
-
|
| 128 |
-
# Emotions dictionary
|
| 129 |
-
emotions = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'}
|
| 130 |
-
|
| 131 |
-
# Function to predict emotion from audio
|
| 132 |
-
def predict_emotion_from_audio(wav_filepath):
|
| 133 |
-
try:
|
| 134 |
-
if model is None:
|
| 135 |
-
return "Model not loaded"
|
| 136 |
-
|
| 137 |
-
test_point = extract_mfcc(wav_filepath)
|
| 138 |
-
if test_point is not None:
|
| 139 |
-
test_point = np.reshape(test_point, newshape=(1, 40, 1))
|
| 140 |
-
predictions = model.predict(test_point)
|
| 141 |
-
predicted_emotion_label = np.argmax(predictions[0])
|
| 142 |
-
return emotions.get(predicted_emotion_label, "Unknown emotion")
|
| 143 |
-
else:
|
| 144 |
-
return "Error: Unable to extract features"
|
| 145 |
-
except Exception as e:
|
| 146 |
-
print("Error predicting emotion:", e)
|
| 147 |
-
return "Prediction error"
|
| 148 |
-
|
| 149 |
-
# Function to analyze sentiment from text
|
| 150 |
-
def analyze_sentiment(text):
|
| 151 |
-
try:
|
| 152 |
-
if not text or text.strip() == "":
|
| 153 |
-
return "neutral", 0.0
|
| 154 |
-
|
| 155 |
-
analysis = TextBlob(text)
|
| 156 |
-
polarity = analysis.sentiment.polarity
|
| 157 |
-
|
| 158 |
-
if polarity > 0.1:
|
| 159 |
-
sentiment = "positive"
|
| 160 |
-
elif polarity < -0.1:
|
| 161 |
-
sentiment = "negative"
|
| 162 |
-
else:
|
| 163 |
-
sentiment = "neutral"
|
| 164 |
-
|
| 165 |
-
return sentiment, polarity
|
| 166 |
-
except Exception as e:
|
| 167 |
-
print("Error analyzing sentiment:", e)
|
| 168 |
-
return "neutral", 0.0
|
| 169 |
-
|
| 170 |
-
# Function to get image prompt based on sentiment
|
| 171 |
-
def get_image_prompt(sentiment, transcribed_text, chunk_idx, total_chunks):
|
| 172 |
-
base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
|
| 173 |
-
|
| 174 |
-
if sentiment == "positive":
|
| 175 |
-
return base_prompt + f"Generate a vibrant, uplifting equirectangular 360 image texture with bright colors, joyful atmosphere, and optimistic vibes representing: [{transcribed_text}]. The scene should evoke happiness and positivity."
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
-
|
| 181 |
-
return base_prompt + f"Generate a balanced, serene equirectangular 360 image texture with harmonious colors, peaceful atmosphere, and calm vibes representing: [{transcribed_text}]. The scene should evoke tranquility and balance."
|
| 182 |
-
|
| 183 |
-
# Function to get music prompt based on emotion
|
| 184 |
-
def get_music_prompt(emotion, transcribed_text, chunk_idx, total_chunks):
|
| 185 |
-
base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
'angry': f"Create intense, powerful music with driving rhythms, aggressive instrumentation, and strong dynamics that expresses: {transcribed_text}. The music should evoke anger and intensity.",
|
| 193 |
-
'fearful': f"Generate suspenseful, tense music with eerie melodies, atmospheric sounds, and unsettling vibes that represents: {transcribed_text}. The music should evoke fear and anticipation.",
|
| 194 |
-
'disgust': f"Create dark, unsettling music with dissonant harmonies, unusual sounds, and uncomfortable atmosphere that reflects: {transcribed_text}. The music should evoke discomfort and unease.",
|
| 195 |
-
'surprised': f"Generate dynamic, unexpected music with sudden changes, playful melodies, and surprising elements that represents: {transcribed_text}. The music should evoke surprise and wonder."
|
| 196 |
-
}
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
inputs = processor(
|
| 215 |
-
text=[prompt],
|
| 216 |
-
padding=True,
|
| 217 |
-
return_tensors="pt",
|
| 218 |
-
).to(device)
|
| 219 |
-
|
| 220 |
-
# Generate audio
|
| 221 |
-
audio_values = music_model.generate(**inputs, max_new_tokens=512)
|
| 222 |
-
|
| 223 |
-
# Convert to numpy array and sample rate
|
| 224 |
-
sampling_rate = music_model.config.audio_encoder.sampling_rate
|
| 225 |
-
audio_data = audio_values[0, 0].cpu().numpy()
|
| 226 |
-
|
| 227 |
-
# Normalize audio data
|
| 228 |
-
audio_data = audio_data / np.max(np.abs(audio_data))
|
| 229 |
-
|
| 230 |
-
# Create a temporary file to save the audio
|
| 231 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 232 |
-
scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
|
| 233 |
-
return tmp_file.name
|
| 234 |
-
|
| 235 |
-
except Exception as e:
|
| 236 |
-
print("Error generating music:", e)
|
| 237 |
-
return None
|
| 238 |
-
|
| 239 |
-
# --- DeepAI Image Generation (Text2Img) ---
|
| 240 |
-
api_key = os.getenv("DeepAI_api_key")
|
| 241 |
-
|
| 242 |
-
# Function to upscale image using Lanczos interpolation
|
| 243 |
-
def upscale_image(image, target_width=4096, target_height=2048):
|
| 244 |
-
"""
|
| 245 |
-
Upscale image using DeepAI's Torch-SRGAN API for super resolution
|
| 246 |
-
"""
|
| 247 |
-
try:
|
| 248 |
-
if not api_key:
|
| 249 |
-
print("No API key available for upscaling")
|
| 250 |
-
# Fallback to OpenCV if no API key
|
| 251 |
-
img_array = np.array(image)
|
| 252 |
-
upscaled = cv2.resize(
|
| 253 |
-
img_array,
|
| 254 |
-
(target_width, target_height),
|
| 255 |
-
interpolation=cv2.INTER_LANCZOS4
|
| 256 |
-
)
|
| 257 |
-
return Image.fromarray(upscaled)
|
| 258 |
-
|
| 259 |
-
# Save the image to a temporary file
|
| 260 |
-
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_input:
|
| 261 |
-
image.save(tmp_input.name, "JPEG", quality=95)
|
| 262 |
-
|
| 263 |
-
# Make request to DeepAI torch-srgan API
|
| 264 |
-
response = requests.post(
|
| 265 |
-
"https://api.deepai.org/api/torch-srgan",
|
| 266 |
-
files={'image': open(tmp_input.name, 'rb')},
|
| 267 |
-
headers={'api-key': api_key}
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
-
data = response.json()
|
| 271 |
-
|
| 272 |
-
if 'output_url' in data:
|
| 273 |
-
# Download the upscaled image
|
| 274 |
-
img_resp = requests.get(data['output_url'])
|
| 275 |
-
upscaled_image = Image.open(BytesIO(img_resp.content))
|
| 276 |
-
|
| 277 |
-
# Ensure the image meets our target dimensions
|
| 278 |
-
if upscaled_image.size != (target_width, target_height):
|
| 279 |
-
upscaled_image = upscaled_image.resize(
|
| 280 |
-
(target_width, target_height),
|
| 281 |
-
Image.Resampling.LANCZOS
|
| 282 |
-
)
|
| 283 |
-
|
| 284 |
-
# Clean up temporary file
|
| 285 |
-
os.unlink(tmp_input.name)
|
| 286 |
-
return upscaled_image
|
| 287 |
-
else:
|
| 288 |
-
print("Error in DeepAI upscaling response:", data)
|
| 289 |
-
# Fallback to OpenCV if API fails
|
| 290 |
-
img_array = np.array(image)
|
| 291 |
-
upscaled = cv2.resize(
|
| 292 |
-
img_array,
|
| 293 |
-
(target_width, target_height),
|
| 294 |
-
interpolation=cv2.INTER_LANCZOS4
|
| 295 |
-
)
|
| 296 |
-
return Image.fromarray(upscaled)
|
| 297 |
-
|
| 298 |
-
except Exception as e:
|
| 299 |
-
print(f"Error upscaling image with DeepAI: {e}")
|
| 300 |
-
# Fallback to OpenCV if any error occurs
|
| 301 |
-
img_array = np.array(image)
|
| 302 |
-
upscaled = cv2.resize(
|
| 303 |
-
img_array,
|
| 304 |
-
(target_width, target_height),
|
| 305 |
-
interpolation=cv2.INTER_LANCZOS4
|
| 306 |
-
)
|
| 307 |
-
return Image.fromarray(upscaled)
|
| 308 |
-
|
| 309 |
-
# Function to generate image using DeepAI API
|
| 310 |
-
def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
|
| 311 |
-
try:
|
| 312 |
-
if not api_key:
|
| 313 |
-
# fallback white image if no API key
|
| 314 |
-
base_image = Image.new('RGB', (1024,512), color='white')
|
| 315 |
else:
|
| 316 |
-
#
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
# Make request to DeepAI text2img API
|
| 320 |
-
response = requests.post(
|
| 321 |
-
"https://api.deepai.org/api/text2img",
|
| 322 |
-
data={
|
| 323 |
-
'text': prompt,
|
| 324 |
-
'width': 1024,
|
| 325 |
-
'height': 512,
|
| 326 |
-
'image_generator_version': 'hd'
|
| 327 |
-
},
|
| 328 |
-
headers={'api-key': api_key}
|
| 329 |
-
)
|
| 330 |
-
|
| 331 |
-
data = response.json()
|
| 332 |
-
if 'output_url' in data:
|
| 333 |
-
# Download the generated image
|
| 334 |
-
img_resp = requests.get(data['output_url'])
|
| 335 |
-
base_image = Image.open(BytesIO(img_resp.content))
|
| 336 |
-
else:
|
| 337 |
-
print("Error in DeepAI response:", data)
|
| 338 |
-
# Return a fallback image
|
| 339 |
-
base_image = Image.new('RGB', (1024,512), color='white')
|
| 340 |
-
|
| 341 |
-
# Upscale the image for better quality in 360 viewer
|
| 342 |
-
upscaled_image = upscale_image(base_image)
|
| 343 |
-
return upscaled_image
|
| 344 |
-
|
| 345 |
-
except Exception as e:
|
| 346 |
-
print("Error generating image:", e)
|
| 347 |
-
# Return a fallback image
|
| 348 |
-
return Image.new('RGB', (1024,512), color='white')
|
| 349 |
-
|
| 350 |
-
# Function to process a single chunk
|
| 351 |
-
def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
|
| 352 |
-
try:
|
| 353 |
-
# Get acoustic emotion prediction (for music)
|
| 354 |
-
emotion_prediction = predict_emotion_from_audio(chunk_path)
|
| 355 |
-
|
| 356 |
-
# Get transcribed text
|
| 357 |
-
transcribed_text = transcribe(chunk_path)
|
| 358 |
-
|
| 359 |
-
# Analyze sentiment of transcribed text (for image)
|
| 360 |
-
sentiment, polarity = analyze_sentiment(transcribed_text)
|
| 361 |
-
|
| 362 |
-
# Generate image using SENTIMENT analysis with specific prompt
|
| 363 |
-
image = generate_image(sentiment, transcribed_text, chunk_idx, total_chunks)
|
| 364 |
-
|
| 365 |
-
# Add 360 metadata to the image
|
| 366 |
-
image_with_360_path = add_360_metadata(image)
|
| 367 |
-
|
| 368 |
-
# Generate music only if audio generation is enabled
|
| 369 |
-
music_path = None
|
| 370 |
-
if generate_audio:
|
| 371 |
-
music_path = generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks)
|
| 372 |
-
|
| 373 |
-
return {
|
| 374 |
-
'chunk_index': chunk_idx + 1,
|
| 375 |
-
'emotion': emotion_prediction,
|
| 376 |
-
'transcription': transcribed_text,
|
| 377 |
-
'sentiment': sentiment,
|
| 378 |
-
'image': image, # Original image for display in Gradio
|
| 379 |
-
'image_360': image_with_360_path, # Image with 360 metadata
|
| 380 |
-
'music': music_path
|
| 381 |
-
}
|
| 382 |
-
except Exception as e:
|
| 383 |
-
print(f"Error processing chunk {chunk_idx + 1}:", e)
|
| 384 |
-
# Return a fallback result with all required keys
|
| 385 |
-
return {
|
| 386 |
-
'chunk_index': chunk_idx + 1,
|
| 387 |
-
'emotion': "Error",
|
| 388 |
-
'transcription': "Transcription failed",
|
| 389 |
-
'sentiment': "Sentiment: error",
|
| 390 |
-
'image': Image.new('RGB', (1440, 770), color='white'),
|
| 391 |
-
'image_360': None,
|
| 392 |
-
'music': None
|
| 393 |
-
}
|
| 394 |
-
|
| 395 |
-
# Function to get predictions for all chunks
|
| 396 |
-
def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
|
| 397 |
-
# Chunk the audio into segments
|
| 398 |
-
chunk_files, total_chunks = chunk_audio(audio_input, chunk_duration)
|
| 399 |
-
|
| 400 |
-
results = []
|
| 401 |
-
|
| 402 |
-
# Process each chunk
|
| 403 |
-
for i, chunk_path in enumerate(chunk_files):
|
| 404 |
-
print(f"Processing chunk {i+1}/{total_chunks} ({chunk_duration}s each)")
|
| 405 |
-
result = process_chunk(chunk_path, i, total_chunks, generate_audio)
|
| 406 |
-
results.append(result)
|
| 407 |
-
|
| 408 |
-
# Clean up temporary chunk files
|
| 409 |
-
for chunk_path in chunk_files:
|
| 410 |
-
try:
|
| 411 |
-
if chunk_path != audio_input: # Don't delete original input file
|
| 412 |
-
os.unlink(chunk_path)
|
| 413 |
-
except:
|
| 414 |
-
pass
|
| 415 |
-
|
| 416 |
-
return results
|
| 417 |
-
|
| 418 |
-
def create_xmp_block(width, height):
|
| 419 |
-
"""Create XMP metadata block following ExifTool's exact format."""
|
| 420 |
-
xmp = (
|
| 421 |
-
f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
|
| 422 |
-
f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
|
| 423 |
-
f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
|
| 424 |
-
f'<rdf:Description rdf:about=""\n'
|
| 425 |
-
f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
|
| 426 |
-
f'GPano:ProjectionType="equirectangular"\n'
|
| 427 |
-
f'GPano:UsePanoramaViewer="True"\n'
|
| 428 |
-
f'GPano:FullPanoWidthPixels="{width}"\n'
|
| 429 |
-
f'GPano:FullPanoHeightPixels="{height}"\n'
|
| 430 |
-
f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
|
| 431 |
-
f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
|
| 432 |
-
f'GPano:CroppedAreaLeftPixels="0"\n'
|
| 433 |
-
f'GPano:CroppedAreaTopPixels="0"/>\n'
|
| 434 |
-
f'</rdf:RDF>\n'
|
| 435 |
-
f'</x:xmpmeta>\n'
|
| 436 |
-
f'<?xpacket end="w"?>'
|
| 437 |
-
)
|
| 438 |
-
return xmp
|
| 439 |
-
|
| 440 |
-
def write_xmp_to_jpg(input_path, output_path, width, height):
|
| 441 |
-
"""Write XMP metadata to JPEG file following ExifTool's method."""
|
| 442 |
-
# Read the original JPEG
|
| 443 |
-
with open(input_path, 'rb') as f:
|
| 444 |
-
data = f.read()
|
| 445 |
-
|
| 446 |
-
# Find the start of image marker
|
| 447 |
-
if data[0:2] != b'\xFF\xD8':
|
| 448 |
-
raise ValueError("Not a valid JPEG file")
|
| 449 |
-
|
| 450 |
-
# Create XMP data
|
| 451 |
-
xmp_data = create_xmp_block(width, height)
|
| 452 |
-
|
| 453 |
-
# Create APP1 segment for XMP
|
| 454 |
-
app1_marker = b'\xFF\xE1'
|
| 455 |
-
xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
|
| 456 |
-
xmp_bytes = xmp_data.encode('utf-8')
|
| 457 |
-
length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
|
| 458 |
-
length_bytes = struct.pack('>H', length)
|
| 459 |
|
| 460 |
-
#
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
output.extend(length_bytes)
|
| 465 |
-
output.extend(xmp_header)
|
| 466 |
-
output.extend(xmp_bytes)
|
| 467 |
-
output.extend(data[2:]) # Rest of the original file
|
| 468 |
|
| 469 |
-
#
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
try:
|
| 476 |
-
# First, ensure the image is upscaled to 4096x2048
|
| 477 |
-
target_width, target_height = 4096, 2048
|
| 478 |
-
if img.width != target_width or img.height != target_height:
|
| 479 |
-
img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
| 480 |
|
| 481 |
-
# Create a temporary
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
except Exception as e:
|
| 492 |
-
print(f"Error adding 360 metadata: {str(e)}")
|
| 493 |
-
# Fallback: return the original image path
|
| 494 |
-
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
|
| 495 |
-
img.save(tmp_file.name, "JPEG", quality=95)
|
| 496 |
-
return tmp_file.name
|
| 497 |
|
|
|
|
| 498 |
def create_360_viewer_html(image_paths, audio_paths, output_path):
|
| 499 |
"""Create an HTML file with a 360 viewer and audio player for the given images and audio."""
|
| 500 |
# Create a list of image data URIs
|
|
@@ -514,14 +122,14 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
|
|
| 514 |
else:
|
| 515 |
audio_data_list.append(None) # Placeholder for chunks without audio
|
| 516 |
|
| 517 |
-
# Create the HTML content
|
| 518 |
html_content = f"""
|
| 519 |
<!DOCTYPE html>
|
| 520 |
<html lang="en">
|
| 521 |
<head>
|
| 522 |
<meta charset="UTF-8">
|
| 523 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 524 |
-
<title>360 Panorama Viewer with Audio</title>
|
| 525 |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
|
| 526 |
<style>
|
| 527 |
body {{
|
|
@@ -599,9 +207,46 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
|
|
| 599 |
border-radius: 3px;
|
| 600 |
border: 1px solid #ccc;
|
| 601 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
</style>
|
| 603 |
</head>
|
| 604 |
<body>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
<div id="controls">
|
| 606 |
<select id="image-selector">
|
| 607 |
{"".join([f'<option value="{i}">Chunk {i+1}</option>' for i in range(len(image_data_list))])}
|
|
@@ -657,6 +302,16 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
|
|
| 657 |
}}
|
| 658 |
}}
|
| 659 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
// Load the first image initially
|
| 661 |
loadPanorama(0);
|
| 662 |
|
|
@@ -676,135 +331,7 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
|
|
| 676 |
|
| 677 |
return output_path
|
| 678 |
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
# Update the process_and_display function
|
| 693 |
-
def process_and_display(audio_input, generate_audio, chunk_duration):
|
| 694 |
-
# Validate chunk duration
|
| 695 |
-
if chunk_duration is None or chunk_duration <= 0:
|
| 696 |
-
chunk_duration = 10
|
| 697 |
-
|
| 698 |
-
# Show loading indicator
|
| 699 |
-
yield [gr.HTML(f"""
|
| 700 |
-
<div style="text-align: center; margin: 20px;">
|
| 701 |
-
<p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
|
| 702 |
-
<div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
|
| 703 |
-
<style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
|
| 704 |
-
<p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
|
| 705 |
-
</div>
|
| 706 |
-
""")] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
|
| 707 |
-
|
| 708 |
-
results = get_predictions(audio_input, generate_audio, chunk_duration)
|
| 709 |
-
|
| 710 |
-
# Initialize outputs list
|
| 711 |
-
outputs = []
|
| 712 |
-
group_visibility = []
|
| 713 |
-
all_360_images = [] # Collect all 360 images for the viewer
|
| 714 |
-
all_music_paths = [] # Collect all music paths for the viewer
|
| 715 |
-
|
| 716 |
-
# Process each result
|
| 717 |
-
for i, result in enumerate(results):
|
| 718 |
-
if i < len(output_containers):
|
| 719 |
-
group_visibility.append(gr.Group(visible=True))
|
| 720 |
-
outputs.extend([
|
| 721 |
-
result['emotion'],
|
| 722 |
-
result['transcription'],
|
| 723 |
-
result['sentiment'],
|
| 724 |
-
result['image'],
|
| 725 |
-
result['image_360'],
|
| 726 |
-
result['music']
|
| 727 |
-
])
|
| 728 |
-
# Collect the 360-processed images and music
|
| 729 |
-
if result['image_360']:
|
| 730 |
-
all_360_images.append(result['image_360']) # Use the 360-processed image
|
| 731 |
-
all_music_paths.append(result['music']) # Can be None if no music generated
|
| 732 |
-
else:
|
| 733 |
-
# If we have more results than containers, just extend with None
|
| 734 |
-
group_visibility.append(gr.Group(visible=False))
|
| 735 |
-
outputs.extend([None] * 6)
|
| 736 |
-
|
| 737 |
-
# Hide remaining containers
|
| 738 |
-
for i in range(len(results), len(output_containers)):
|
| 739 |
-
group_visibility.append(gr.Group(visible=False))
|
| 740 |
-
outputs.extend([None] * 6)
|
| 741 |
-
|
| 742 |
-
# Create 360 viewer HTML if we have 360 images
|
| 743 |
-
viewer_html_path = None
|
| 744 |
-
if all_360_images:
|
| 745 |
-
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
| 746 |
-
viewer_html_path = create_360_viewer_html(all_360_images, all_music_paths, tmp_file.name)
|
| 747 |
-
|
| 748 |
-
# After processing, return the results along with other outputs
|
| 749 |
-
yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output, results]
|
| 750 |
-
|
| 751 |
-
# Update the clear_all function to handle the new outputs
|
| 752 |
-
def clear_all():
|
| 753 |
-
# Create a list with None for all outputs
|
| 754 |
-
outputs = [None] # For audio input
|
| 755 |
-
|
| 756 |
-
# For group components (set to invisible)
|
| 757 |
-
outputs.extend([gr.Group(visible=False)] * len(group_components))
|
| 758 |
-
|
| 759 |
-
# For all output containers (set to None)
|
| 760 |
-
outputs.extend([None] * (len(output_containers) * 6))
|
| 761 |
-
|
| 762 |
-
# For loading indicator (empty HTML)
|
| 763 |
-
outputs.append(gr.HTML(""))
|
| 764 |
-
|
| 765 |
-
# For chunk duration (reset to 10)
|
| 766 |
-
outputs.append(10)
|
| 767 |
-
|
| 768 |
-
# For example selector (reset to None)
|
| 769 |
-
outputs.append(None)
|
| 770 |
-
|
| 771 |
-
# For viewer (set to None)
|
| 772 |
-
outputs.append(None)
|
| 773 |
-
|
| 774 |
-
# For JavaScript output (empty)
|
| 775 |
-
outputs.append("")
|
| 776 |
-
|
| 777 |
-
return outputs
|
| 778 |
-
|
| 779 |
-
# Function to load example audio (placeholder - you need to implement this)
|
| 780 |
-
def load_example_audio(example_name):
|
| 781 |
-
# This is a placeholder - you need to implement this function
|
| 782 |
-
# Return the path to the example audio file based on the example_name
|
| 783 |
-
return None
|
| 784 |
-
|
| 785 |
-
# Function to generate a shareable link
|
| 786 |
-
|
| 787 |
-
def generate_share_link(audio_input=None, generate_audio=True, chunk_duration=10):
|
| 788 |
-
try:
|
| 789 |
-
# Check if we're on Hugging Face Spaces
|
| 790 |
-
space_id = os.getenv('SPACE_ID')
|
| 791 |
-
|
| 792 |
-
if space_id:
|
| 793 |
-
space_url = f"https://huggingface.co/spaces/{space_id}"
|
| 794 |
-
return f"Your Space is already public! Share this URL: {space_url}\n\nTo share specific results, ask others to process the same audio with the same settings."
|
| 795 |
-
else:
|
| 796 |
-
if hasattr(interface, 'share_url') and interface.share_url:
|
| 797 |
-
return "Share this URL to let others use the app: " + interface.share_url + "\n\nTo share specific results, ask others to process the same audio with the same settings."
|
| 798 |
-
else:
|
| 799 |
-
return "Share link is not available. Make sure to set share=True when launching."
|
| 800 |
-
|
| 801 |
-
except Exception as e:
|
| 802 |
-
return f"Error generating share link: {str(e)}"
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
|
| 809 |
# Create the Gradio interface with proper output handling
|
| 810 |
with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
|
|
@@ -845,9 +372,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
|
|
| 845 |
with gr.Row():
|
| 846 |
process_btn = gr.Button("Process Audio", variant="primary")
|
| 847 |
clear_btn = gr.Button("Clear All", variant="secondary")
|
| 848 |
-
# Add share button
|
| 849 |
-
share_btn = gr.Button("Generate Share Link", variant="secondary")
|
| 850 |
-
share_output = gr.Textbox(label="Share Link", interactive=False)
|
| 851 |
|
| 852 |
# Add a loading indicator
|
| 853 |
loading_indicator = gr.HTML("""
|
|
@@ -887,11 +411,12 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
|
|
| 887 |
'music': audio_output
|
| 888 |
})
|
| 889 |
|
| 890 |
-
# Add component for 360 viewer
|
| 891 |
viewer_html_output = gr.File(
|
| 892 |
-
label="Download
|
| 893 |
type="filepath",
|
| 894 |
-
interactive=False
|
|
|
|
| 895 |
)
|
| 896 |
|
| 897 |
# Add a hidden HTML component for JavaScript execution
|
|
@@ -909,9 +434,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
|
|
| 909 |
return example_path, example_name
|
| 910 |
|
| 911 |
# Set up the button clicks
|
| 912 |
-
|
| 913 |
-
# Update the process_btn click handler to include results in the output
|
| 914 |
-
# Remove the results_state component and simplify the process_btn click handler
|
| 915 |
process_btn.click(
|
| 916 |
fn=process_and_display,
|
| 917 |
inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
|
|
@@ -924,10 +446,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
|
|
| 924 |
container['music']
|
| 925 |
]] + [viewer_html_output, js_output]
|
| 926 |
)
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
# Remove the results_state component
|
| 931 |
|
| 932 |
clear_btn.click(
|
| 933 |
fn=clear_all,
|
|
@@ -947,23 +465,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
|
|
| 947 |
inputs=[example_selector],
|
| 948 |
outputs=[audio_input, example_selector]
|
| 949 |
)
|
| 950 |
-
|
| 951 |
-
# Update the share button to not expect results
|
| 952 |
-
share_btn.click(
|
| 953 |
-
fn=generate_share_link,
|
| 954 |
-
inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
|
| 955 |
-
outputs=[share_output]
|
| 956 |
-
)
|
| 957 |
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
is_spaces = os.getenv('SPACE_ID') is not None
|
| 961 |
-
|
| 962 |
-
# Launch with appropriate settings
|
| 963 |
-
if is_spaces:
|
| 964 |
-
# On Spaces, don't use share=True as it's not supported
|
| 965 |
-
interface.launch()
|
| 966 |
-
else:
|
| 967 |
-
# Running locally, use share=True to generate a public link
|
| 968 |
-
interface.launch(share=True)
|
| 969 |
-
|
|
|
|
| 30 |
from io import BytesIO
|
| 31 |
import struct
|
| 32 |
import cv2
|
| 33 |
+
import shutil
|
| 34 |
+
from datetime import datetime
|
| 35 |
|
| 36 |
+
# [Keep all your existing code until the process_and_display function]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
# Update the process_and_display function to create a named HTML file
|
| 39 |
+
def process_and_display(audio_input, generate_audio, chunk_duration):
|
| 40 |
+
# Validate chunk duration
|
| 41 |
+
if chunk_duration is None or chunk_duration <= 0:
|
| 42 |
+
chunk_duration = 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
# Show loading indicator
|
| 45 |
+
yield [gr.HTML(f"""
|
| 46 |
+
<div style="text-align: center; margin: 20px;">
|
| 47 |
+
<p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
|
| 48 |
+
<div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
|
| 49 |
+
<style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
|
| 50 |
+
<p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
|
| 51 |
+
</div>
|
| 52 |
+
""")] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
|
| 53 |
|
| 54 |
+
results = get_predictions(audio_input, generate_audio, chunk_duration)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
# Initialize outputs list
|
| 57 |
+
outputs = []
|
| 58 |
+
group_visibility = []
|
| 59 |
+
all_360_images = [] # Collect all 360 images for the viewer
|
| 60 |
+
all_music_paths = [] # Collect all music paths for the viewer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
# Process each result
|
| 63 |
+
for i, result in enumerate(results):
|
| 64 |
+
if i < len(output_containers):
|
| 65 |
+
group_visibility.append(gr.Group(visible=True))
|
| 66 |
+
outputs.extend([
|
| 67 |
+
result['emotion'],
|
| 68 |
+
result['transcription'],
|
| 69 |
+
result['sentiment'],
|
| 70 |
+
result['image'],
|
| 71 |
+
result['image_360'],
|
| 72 |
+
result['music']
|
| 73 |
+
])
|
| 74 |
+
# Collect the 360-processed images and music
|
| 75 |
+
if result['image_360']:
|
| 76 |
+
all_360_images.append(result['image_360']) # Use the 360-processed image
|
| 77 |
+
all_music_paths.append(result['music']) # Can be None if no music generated
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
else:
|
| 79 |
+
# If we have more results than containers, just extend with None
|
| 80 |
+
group_visibility.append(gr.Group(visible=False))
|
| 81 |
+
outputs.extend([None] * 6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
# Hide remaining containers
|
| 84 |
+
for i in range(len(results), len(output_containers)):
|
| 85 |
+
group_visibility.append(gr.Group(visible=False))
|
| 86 |
+
outputs.extend([None] * 6)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# Create 360 viewer HTML if we have 360 images
|
| 89 |
+
viewer_html_path = None
|
| 90 |
+
if all_360_images:
|
| 91 |
+
# Create a timestamp for unique filenames
|
| 92 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 93 |
+
html_filename = f"MyAVE_{timestamp}.html"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
# Create a temporary directory for our output
|
| 96 |
+
output_dir = tempfile.mkdtemp()
|
| 97 |
+
viewer_html_path = os.path.join(output_dir, html_filename)
|
| 98 |
+
|
| 99 |
+
# Create the HTML file
|
| 100 |
+
create_360_viewer_html(all_360_images, all_music_paths, viewer_html_path)
|
| 101 |
+
|
| 102 |
+
# After processing, return the results along with other outputs
|
| 103 |
+
yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output, results]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
# Update the create_360_viewer_html function to include a download button in the HTML itself
|
| 106 |
def create_360_viewer_html(image_paths, audio_paths, output_path):
|
| 107 |
"""Create an HTML file with a 360 viewer and audio player for the given images and audio."""
|
| 108 |
# Create a list of image data URIs
|
|
|
|
| 122 |
else:
|
| 123 |
audio_data_list.append(None) # Placeholder for chunks without audio
|
| 124 |
|
| 125 |
+
# Create the HTML content with a styled download button
|
| 126 |
html_content = f"""
|
| 127 |
<!DOCTYPE html>
|
| 128 |
<html lang="en">
|
| 129 |
<head>
|
| 130 |
<meta charset="UTF-8">
|
| 131 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 132 |
+
<title>My AVE - 360 Panorama Viewer with Audio</title>
|
| 133 |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
|
| 134 |
<style>
|
| 135 |
body {{
|
|
|
|
| 207 |
border-radius: 3px;
|
| 208 |
border: 1px solid #ccc;
|
| 209 |
}}
|
| 210 |
+
.download-btn {{
|
| 211 |
+
background: linear-gradient(to bottom, #4CAF50, #45a049);
|
| 212 |
+
color: white;
|
| 213 |
+
border: none;
|
| 214 |
+
padding: 12px 24px;
|
| 215 |
+
text-align: center;
|
| 216 |
+
text-decoration: none;
|
| 217 |
+
display: inline-block;
|
| 218 |
+
font-size: 16px;
|
| 219 |
+
margin: 10px 2px;
|
| 220 |
+
cursor: pointer;
|
| 221 |
+
border-radius: 25px;
|
| 222 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
|
| 223 |
+
transition: all 0.3s ease;
|
| 224 |
+
}}
|
| 225 |
+
.download-btn:hover {{
|
| 226 |
+
background: linear-gradient(to bottom, #45a049, #4CAF50);
|
| 227 |
+
box-shadow: 0 6px 12px rgba(0,0,0,0.3);
|
| 228 |
+
transform: translateY(-2px);
|
| 229 |
+
}}
|
| 230 |
+
.header {{
|
| 231 |
+
display: flex;
|
| 232 |
+
justify-content: space-between;
|
| 233 |
+
align-items: center;
|
| 234 |
+
padding: 10px 20px;
|
| 235 |
+
background: rgba(0, 0, 0, 0.8);
|
| 236 |
+
color: white;
|
| 237 |
+
}}
|
| 238 |
+
.title {{
|
| 239 |
+
font-size: 24px;
|
| 240 |
+
font-weight: bold;
|
| 241 |
+
}}
|
| 242 |
</style>
|
| 243 |
</head>
|
| 244 |
<body>
|
| 245 |
+
<div class="header">
|
| 246 |
+
<div class="title">My Affective Virtual Environment</div>
|
| 247 |
+
<button class="download-btn" onclick="downloadHTML()">Download This AVE</button>
|
| 248 |
+
</div>
|
| 249 |
+
|
| 250 |
<div id="controls">
|
| 251 |
<select id="image-selector">
|
| 252 |
{"".join([f'<option value="{i}">Chunk {i+1}</option>' for i in range(len(image_data_list))])}
|
|
|
|
| 302 |
}}
|
| 303 |
}}
|
| 304 |
|
| 305 |
+
function downloadHTML() {{
|
| 306 |
+
// Create a download link for the current HTML file
|
| 307 |
+
const a = document.createElement('a');
|
| 308 |
+
a.href = window.location.href;
|
| 309 |
+
a.download = 'MyAVE.html';
|
| 310 |
+
document.body.appendChild(a);
|
| 311 |
+
a.click();
|
| 312 |
+
document.body.removeChild(a);
|
| 313 |
+
}}
|
| 314 |
+
|
| 315 |
// Load the first image initially
|
| 316 |
loadPanorama(0);
|
| 317 |
|
|
|
|
| 331 |
|
| 332 |
return output_path
|
| 333 |
|
| 334 |
+
# [Keep the rest of your code but remove the share button and related functions]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
# Create the Gradio interface with proper output handling
|
| 337 |
with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
|
|
|
|
| 372 |
with gr.Row():
|
| 373 |
process_btn = gr.Button("Process Audio", variant="primary")
|
| 374 |
clear_btn = gr.Button("Clear All", variant="secondary")
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
# Add a loading indicator
|
| 377 |
loading_indicator = gr.HTML("""
|
|
|
|
| 411 |
'music': audio_output
|
| 412 |
})
|
| 413 |
|
| 414 |
+
# Add component for 360 viewer with a fixed name
|
| 415 |
viewer_html_output = gr.File(
|
| 416 |
+
label="Download Complete AVE Experience (HTML)",
|
| 417 |
type="filepath",
|
| 418 |
+
interactive=False,
|
| 419 |
+
file_count="single"
|
| 420 |
)
|
| 421 |
|
| 422 |
# Add a hidden HTML component for JavaScript execution
|
|
|
|
| 434 |
return example_path, example_name
|
| 435 |
|
| 436 |
# Set up the button clicks
|
|
|
|
|
|
|
|
|
|
| 437 |
process_btn.click(
|
| 438 |
fn=process_and_display,
|
| 439 |
inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
|
|
|
|
| 446 |
container['music']
|
| 447 |
]] + [viewer_html_output, js_output]
|
| 448 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
|
| 450 |
clear_btn.click(
|
| 451 |
fn=clear_all,
|
|
|
|
| 465 |
inputs=[example_selector],
|
| 466 |
outputs=[audio_input, example_selector]
|
| 467 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
|
| 469 |
+
# Launch the interface
|
| 470 |
+
interface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|