Spaces:

GoodML
/

dishDecode

Sleeping

App Files Files Community

GoodML commited on Nov 26, 2024

Commit

590726d

verified ·

1 Parent(s): fa09811

Testing youtube transcription for youtube videos

Browse files

Files changed (1) hide show

app.py +45 -22

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from dotenv import load_dotenv
 from deepgram import DeepgramClient, PrerecordedOptions
 import tempfile
 import json
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
@@ -32,31 +33,20 @@ if not DEEPGRAM_API_KEY:
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
-# Load Whisper AI model at startup
-# print("Loading Whisper AI model..., ANIKET")
-# whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
-# print("Whisper AI model loaded successfully, ANIKET")
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-@app.route("/mbsa")
-def mbsa():
-    return render_template("mbsa.html")
 @app.route('/process-audio', methods=['POST'])
 def process_audio():
-    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     if 'audio' not in request.files:
         return jsonify({"error": "No audio file provided"}), 400
     audio_file = request.files['audio']
-    print("AUDIO FILE NAME: ", audio_file)
     temp_audio_path = None
     try:
@@ -67,23 +57,23 @@ def process_audio():
             temp_audio_path = temp_audio_file.name  # Get the file path
             temp_audio_file.write(audio_file.read())  # Write the uploaded audio to the temp file
-        print(f"Temporary audio file saved at: {temp_audio_path}")
         # Step 2: Transcribe the uploaded audio file synchronously
         transcription = transcribe_audio(temp_audio_path)
-        print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
         if not transcription:
             return jsonify({"error": "Audio transcription failed"}), 500
-        print("GOT THE transcription")
         # Step 3: Generate structured recipe information using Gemini API synchronously
-        print("Starting the GEMINI REQUEST TO STRUCTURE IT")
         structured_data = query_gemini_api(transcription)
-        print("GOT THE STRUCTURED DATA", structured_data)
         # Step 4: Return the structured data
         return jsonify(structured_data)
@@ -97,6 +87,41 @@ def process_audio():
             print(f"Temporary WAV file deleted: {temp_audio_path}")
 def transcribe_audio(wav_file_path):
     """
@@ -127,7 +152,7 @@ def transcribe_audio(wav_file_path):
             # Check if the response is valid
             if response:
-                print("Request successful! Processing response.")
                 # Convert response to JSON string
                 try:
@@ -152,7 +177,7 @@ def transcribe_audio(wav_file_path):
                 transcript_file_path = "transcript_from_transcribe_audio.txt"
                 with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
                     transcript_file.write(transcript)
-                print(f"Transcript saved to file: {transcript_file_path}")
                 return transcript
             else:
@@ -176,7 +201,6 @@ def query_gemini_api(transcription):
     try:
         # Define the structured prompt
         prompt = (
-            "Print the transcription in the response as well"
             "Analyze the provided cooking video transcription and extract the following structured information:\n"
             "1. Recipe Name: Identify the name of the dish being prepared.\n"
             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
@@ -207,7 +231,6 @@ def query_gemini_api(transcription):
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
             headers=headers,
         )
         # Raise error if response code is not 200

 from deepgram import DeepgramClient, PrerecordedOptions
 import tempfile
 import json
+from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiException
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route('/process-audio', methods=['POST'])
 def process_audio():
+    # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     if 'audio' not in request.files:
         return jsonify({"error": "No audio file provided"}), 400
     audio_file = request.files['audio']
+    # print("AUDIO FILE NAME: ", audio_file)
     temp_audio_path = None
     try:
             temp_audio_path = temp_audio_file.name  # Get the file path
             temp_audio_file.write(audio_file.read())  # Write the uploaded audio to the temp file
+        # print(f"Temporary audio file saved at: {temp_audio_path}")
         # Step 2: Transcribe the uploaded audio file synchronously
         transcription = transcribe_audio(temp_audio_path)
+        # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
         if not transcription:
             return jsonify({"error": "Audio transcription failed"}), 500
+        # print("GOT THE transcription")
         # Step 3: Generate structured recipe information using Gemini API synchronously
+        # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
         structured_data = query_gemini_api(transcription)
+        # print("GOT THE STRUCTURED DATA", structured_data)
         # Step 4: Return the structured data
         return jsonify(structured_data)
             print(f"Temporary WAV file deleted: {temp_audio_path}")
+@app.route('/process-youtube', methods=['POST'])
+def process_youtube():
+    youtube_url = request.json.get('youtube_url')
+    if not youtube_url:
+        return jsonify({"error": "No YouTube URL provided"}), 400
+    try:
+        # Extract the YouTube video ID from the URL
+        video_id = youtube_url.split("v=")[-1].split("&")[0]
+        try:
+            # Fetch the transcript for the video using YouTubeTranscriptApi
+            transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
+            # Concatenate the text from each segment into a single transcript
+            transcript = " ".join([segment['text'] for segment in transcript_data])
+        except YouTubeTranscriptApiException as e:
+            print(f"Error fetching transcript for video ID {video_id}: {e}")
+        if not transcript:
+            return jsonify({"error": "Unable to fetch transcript from YouTube"}), 500
+        # Send the transcript to the Gemini API for structured data
+        structured_data = query_gemini_api(transcript)
+        # Return the structured data
+        return jsonify(structured_data)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
 def transcribe_audio(wav_file_path):
     """
             # Check if the response is valid
             if response:
+                # print("Request successful! Processing response.")
                 # Convert response to JSON string
                 try:
                 transcript_file_path = "transcript_from_transcribe_audio.txt"
                 with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
                     transcript_file.write(transcript)
+                # print(f"Transcript saved to file: {transcript_file_path}")
                 return transcript
             else:
     try:
         # Define the structured prompt
         prompt = (
             "Analyze the provided cooking video transcription and extract the following structured information:\n"
             "1. Recipe Name: Identify the name of the dish being prepared.\n"
             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
             headers=headers,
         )
         # Raise error if response code is not 200