Spaces:
Sleeping
Sleeping
Testing youtube transcription for youtube videos
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
|
|
| 6 |
from deepgram import DeepgramClient, PrerecordedOptions
|
| 7 |
import tempfile
|
| 8 |
import json
|
|
|
|
| 9 |
|
| 10 |
import warnings
|
| 11 |
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
|
|
@@ -32,31 +33,20 @@ if not DEEPGRAM_API_KEY:
|
|
| 32 |
GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
|
| 33 |
GEMINI_API_KEY = API_KEY
|
| 34 |
|
| 35 |
-
# Load Whisper AI model at startup
|
| 36 |
-
# print("Loading Whisper AI model..., ANIKET")
|
| 37 |
-
# whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
|
| 38 |
-
# print("Whisper AI model loaded successfully, ANIKET")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
@app.route("/", methods=["GET"])
|
| 42 |
def health_check():
|
| 43 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
| 44 |
|
| 45 |
|
| 46 |
-
@app.route("/mbsa")
|
| 47 |
-
def mbsa():
|
| 48 |
-
return render_template("mbsa.html")
|
| 49 |
-
|
| 50 |
-
|
| 51 |
@app.route('/process-audio', methods=['POST'])
|
| 52 |
def process_audio():
|
| 53 |
-
print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
|
| 54 |
|
| 55 |
if 'audio' not in request.files:
|
| 56 |
return jsonify({"error": "No audio file provided"}), 400
|
| 57 |
|
| 58 |
audio_file = request.files['audio']
|
| 59 |
-
print("AUDIO FILE NAME: ", audio_file)
|
| 60 |
|
| 61 |
temp_audio_path = None
|
| 62 |
try:
|
|
@@ -67,23 +57,23 @@ def process_audio():
|
|
| 67 |
temp_audio_path = temp_audio_file.name # Get the file path
|
| 68 |
temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
|
| 69 |
|
| 70 |
-
print(f"Temporary audio file saved at: {temp_audio_path}")
|
| 71 |
|
| 72 |
# Step 2: Transcribe the uploaded audio file synchronously
|
| 73 |
transcription = transcribe_audio(temp_audio_path)
|
| 74 |
|
| 75 |
-
print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
|
| 76 |
|
| 77 |
if not transcription:
|
| 78 |
return jsonify({"error": "Audio transcription failed"}), 500
|
| 79 |
|
| 80 |
-
print("GOT THE transcription")
|
| 81 |
|
| 82 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
| 83 |
-
print("Starting the GEMINI REQUEST TO STRUCTURE IT")
|
| 84 |
structured_data = query_gemini_api(transcription)
|
| 85 |
|
| 86 |
-
print("GOT THE STRUCTURED DATA", structured_data)
|
| 87 |
# Step 4: Return the structured data
|
| 88 |
return jsonify(structured_data)
|
| 89 |
|
|
@@ -97,6 +87,41 @@ def process_audio():
|
|
| 97 |
print(f"Temporary WAV file deleted: {temp_audio_path}")
|
| 98 |
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def transcribe_audio(wav_file_path):
|
| 102 |
"""
|
|
@@ -127,7 +152,7 @@ def transcribe_audio(wav_file_path):
|
|
| 127 |
|
| 128 |
# Check if the response is valid
|
| 129 |
if response:
|
| 130 |
-
print("Request successful! Processing response.")
|
| 131 |
|
| 132 |
# Convert response to JSON string
|
| 133 |
try:
|
|
@@ -152,7 +177,7 @@ def transcribe_audio(wav_file_path):
|
|
| 152 |
transcript_file_path = "transcript_from_transcribe_audio.txt"
|
| 153 |
with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
|
| 154 |
transcript_file.write(transcript)
|
| 155 |
-
print(f"Transcript saved to file: {transcript_file_path}")
|
| 156 |
|
| 157 |
return transcript
|
| 158 |
else:
|
|
@@ -176,7 +201,6 @@ def query_gemini_api(transcription):
|
|
| 176 |
try:
|
| 177 |
# Define the structured prompt
|
| 178 |
prompt = (
|
| 179 |
-
"Print the transcription in the response as well"
|
| 180 |
"Analyze the provided cooking video transcription and extract the following structured information:\n"
|
| 181 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
| 182 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
|
@@ -207,7 +231,6 @@ def query_gemini_api(transcription):
|
|
| 207 |
f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
|
| 208 |
json=payload,
|
| 209 |
headers=headers,
|
| 210 |
-
|
| 211 |
)
|
| 212 |
|
| 213 |
# Raise error if response code is not 200
|
|
|
|
| 6 |
from deepgram import DeepgramClient, PrerecordedOptions
|
| 7 |
import tempfile
|
| 8 |
import json
|
| 9 |
+
from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiException
|
| 10 |
|
| 11 |
import warnings
|
| 12 |
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
|
|
|
|
| 33 |
GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
|
| 34 |
GEMINI_API_KEY = API_KEY
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
@app.route("/", methods=["GET"])
|
| 37 |
def health_check():
|
| 38 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
| 39 |
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
@app.route('/process-audio', methods=['POST'])
|
| 42 |
def process_audio():
|
| 43 |
+
# print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
|
| 44 |
|
| 45 |
if 'audio' not in request.files:
|
| 46 |
return jsonify({"error": "No audio file provided"}), 400
|
| 47 |
|
| 48 |
audio_file = request.files['audio']
|
| 49 |
+
# print("AUDIO FILE NAME: ", audio_file)
|
| 50 |
|
| 51 |
temp_audio_path = None
|
| 52 |
try:
|
|
|
|
| 57 |
temp_audio_path = temp_audio_file.name # Get the file path
|
| 58 |
temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
|
| 59 |
|
| 60 |
+
# print(f"Temporary audio file saved at: {temp_audio_path}")
|
| 61 |
|
| 62 |
# Step 2: Transcribe the uploaded audio file synchronously
|
| 63 |
transcription = transcribe_audio(temp_audio_path)
|
| 64 |
|
| 65 |
+
# print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
|
| 66 |
|
| 67 |
if not transcription:
|
| 68 |
return jsonify({"error": "Audio transcription failed"}), 500
|
| 69 |
|
| 70 |
+
# print("GOT THE transcription")
|
| 71 |
|
| 72 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
| 73 |
+
# print("Starting the GEMINI REQUEST TO STRUCTURE IT")
|
| 74 |
structured_data = query_gemini_api(transcription)
|
| 75 |
|
| 76 |
+
# print("GOT THE STRUCTURED DATA", structured_data)
|
| 77 |
# Step 4: Return the structured data
|
| 78 |
return jsonify(structured_data)
|
| 79 |
|
|
|
|
| 87 |
print(f"Temporary WAV file deleted: {temp_audio_path}")
|
| 88 |
|
| 89 |
|
| 90 |
+
@app.route('/process-youtube', methods=['POST'])
|
| 91 |
+
def process_youtube():
|
| 92 |
+
youtube_url = request.json.get('youtube_url')
|
| 93 |
+
|
| 94 |
+
if not youtube_url:
|
| 95 |
+
return jsonify({"error": "No YouTube URL provided"}), 400
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
# Extract the YouTube video ID from the URL
|
| 99 |
+
video_id = youtube_url.split("v=")[-1].split("&")[0]
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
# Fetch the transcript for the video using YouTubeTranscriptApi
|
| 103 |
+
transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
|
| 104 |
+
|
| 105 |
+
# Concatenate the text from each segment into a single transcript
|
| 106 |
+
transcript = " ".join([segment['text'] for segment in transcript_data])
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
except YouTubeTranscriptApiException as e:
|
| 110 |
+
print(f"Error fetching transcript for video ID {video_id}: {e}")
|
| 111 |
+
|
| 112 |
+
if not transcript:
|
| 113 |
+
return jsonify({"error": "Unable to fetch transcript from YouTube"}), 500
|
| 114 |
+
|
| 115 |
+
# Send the transcript to the Gemini API for structured data
|
| 116 |
+
structured_data = query_gemini_api(transcript)
|
| 117 |
+
|
| 118 |
+
# Return the structured data
|
| 119 |
+
return jsonify(structured_data)
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
return jsonify({"error": str(e)}), 500
|
| 123 |
+
|
| 124 |
+
|
| 125 |
|
| 126 |
def transcribe_audio(wav_file_path):
|
| 127 |
"""
|
|
|
|
| 152 |
|
| 153 |
# Check if the response is valid
|
| 154 |
if response:
|
| 155 |
+
# print("Request successful! Processing response.")
|
| 156 |
|
| 157 |
# Convert response to JSON string
|
| 158 |
try:
|
|
|
|
| 177 |
transcript_file_path = "transcript_from_transcribe_audio.txt"
|
| 178 |
with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
|
| 179 |
transcript_file.write(transcript)
|
| 180 |
+
# print(f"Transcript saved to file: {transcript_file_path}")
|
| 181 |
|
| 182 |
return transcript
|
| 183 |
else:
|
|
|
|
| 201 |
try:
|
| 202 |
# Define the structured prompt
|
| 203 |
prompt = (
|
|
|
|
| 204 |
"Analyze the provided cooking video transcription and extract the following structured information:\n"
|
| 205 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
| 206 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
|
|
|
| 231 |
f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
|
| 232 |
json=payload,
|
| 233 |
headers=headers,
|
|
|
|
| 234 |
)
|
| 235 |
|
| 236 |
# Raise error if response code is not 200
|