Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import uuid | |
| import subprocess | |
| import time | |
| def extract_audio_from_video(video_path, output_format="mp3"): | |
| if not video_path: | |
| return None | |
| output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}" | |
| try: | |
| cmd = [ | |
| "ffmpeg", | |
| "-i", video_path, | |
| "-vn", | |
| "-c:a", "libmp3lame" if output_format == "mp3" else output_format, | |
| "-q:a", "9", | |
| "-ac", "1", | |
| "-ar", "12000", | |
| "-y", output_path | |
| ] | |
| subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| if os.path.exists(output_path): | |
| return output_path | |
| else: | |
| raise Exception("Audio extraction failed") | |
| except Exception as e: | |
| raise Exception(f"Error extracting audio: {str(e)}") | |
| def transcribe_audio(audio_path, api_key, model_id="scribe_v1"): | |
| if not api_key: | |
| raise Exception("API key required") | |
| url = "https://api.elevenlabs.io/v1/speech-to-text" | |
| headers = {"xi-api-key": api_key} | |
| try: | |
| with open(audio_path, "rb") as file: | |
| response = requests.post( | |
| url, | |
| headers=headers, | |
| files={"file": file, "model_id": (None, model_id)}, | |
| timeout=120 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| transcript_text = result.get("text", "") | |
| # Save transcript to file | |
| transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt" | |
| with open(transcript_file, "w", encoding="utf-8") as f: | |
| f.write(transcript_text) | |
| return transcript_text, transcript_file, "Transcription completed successfully" | |
| else: | |
| raise Exception(f"API error: {response.status_code}") | |
| except Exception as e: | |
| raise Exception(f"Transcription failed: {str(e)}") | |
| def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type): | |
| try: | |
| print("Starting video processing...") | |
| start = time.time() | |
| audio_path = extract_audio_from_video(video_path, audio_format) | |
| print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...") | |
| transcription, transcript_path, transcription_status = transcribe_audio( | |
| audio_path, | |
| elevenlabs_api_key, | |
| model_id | |
| ) | |
| if not transcription: | |
| return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None | |
| print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...") | |
| # Generate summary or quiz from transcription | |
| formatted_output, json_path, txt_path = analyze_document( | |
| transcription, | |
| gemini_api_key, | |
| language, | |
| content_type | |
| ) | |
| print(f"Total processing time: {time.time() - start:.2f}s") | |
| return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path | |
| except Exception as e: | |
| error_message = f"Error processing video: {str(e)}" | |
| return None, error_message, None, error_message, error_message, None, None |