Spaces:

Sayiqa
/

deployment

Sleeping

App Files Files Community

Sayiqa commited on Dec 30, 2024

Commit

d271517

verified ·

1 Parent(s): 985d66e

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -26

app.py CHANGED Viewed

@@ -383,6 +383,7 @@ def process_youtube_video(url="", keywords=""):
         summary = ""
         sentiment_label = "N/A"
         recommendations = ""
         if not url.strip():
             return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
@@ -393,36 +394,49 @@ def process_youtube_video(url="", keywords=""):
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
         try:
-            # Method 1: Direct transcript fetch
             try:
-                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
-                text = " ".join([t['text'] for t in transcript])
-            except:
-                # Method 2: Try list_transcripts
-                try:
-                    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-                    # Try multiple language variants
-                    for lang_code in ['en', 'en-US', 'en-GB', 'a.en']:
-                        try:
-                            transcript = transcript_list.find_transcript([lang_code])
                             text = " ".join([t['text'] for t in transcript.fetch()])
                             break
-                        except:
-                            continue
-                    # If no English transcript found, try auto-generated
-                    if 'text' not in locals():
                         transcript = transcript_list.find_generated_transcript(['en'])
                         text = " ".join([t['text'] for t in transcript.fetch()])
-                except:
-                    # Method 3: Try translation
-                    available_transcripts = transcript_list.find_manually_created_transcript()
-                    translated = available_transcripts.translate('en')
-                    text = " ".join([t['text'] for t in translated.fetch()])
-            # Clean and process text
             cleaned_text = re.sub(r'[^\w\s.]', '', text)
             cleaned_text = ' '.join(cleaned_text.split())
@@ -442,8 +456,7 @@ def process_youtube_video(url="", keywords=""):
             summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text
         except Exception as e:
-            print(f"Debug - Transcript Error: {str(e)}")  # Debug logging
-            return thumbnail, f"⚠️ Unable to process video: {str(e)}", "N/A", recommendations
         # Get recommendations
         if keywords.strip():
@@ -452,7 +465,7 @@ def process_youtube_video(url="", keywords=""):
         return thumbnail, summary, sentiment_label, recommendations
     except Exception as e:
-        print(f"Debug - Main Error: {str(e)}")  # Debug logging
         return None, f"Error: {str(e)}", "N/A", ""

         summary = ""
         sentiment_label = "N/A"
         recommendations = ""
+        text = ""
         if not url.strip():
             return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+        # Method 1: Direct transcript fetch
         try:
+            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
+            text = " ".join([t['text'] for t in transcript])
+        except:
+            # Method 2: List available transcripts
             try:
+                transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+                transcript = None
+                # Try English variants
+                for lang_code in ['en', 'en-US', 'en-GB', 'a.en']:
+                    try:
+                        transcript = transcript_list.find_transcript([lang_code])
+                        if transcript:
                             text = " ".join([t['text'] for t in transcript.fetch()])
                             break
+                    except:
+                        continue
+                # Try auto-generated if no manual transcript
+                if not text:
+                    try:
                         transcript = transcript_list.find_generated_transcript(['en'])
                         text = " ".join([t['text'] for t in transcript.fetch()])
+                    except:
+                        # Try translation as last resort
+                        try:
+                            manual_transcript = transcript_list.find_manually_created_transcript()
+                            translated = manual_transcript.translate('en')
+                            text = " ".join([t['text'] for t in translated.fetch()])
+                        except:
+                            raise Exception("No available transcripts found")
+            except Exception as e:
+                return thumbnail, f"⚠️ No transcripts available: {str(e)}", sentiment_label, recommendations
+        if not text:
+            return thumbnail, "⚠️ Could not extract transcript text", sentiment_label, recommendations
+        # Process valid transcript
+        try:
+            # Clean text
             cleaned_text = re.sub(r'[^\w\s.]', '', text)
             cleaned_text = ' '.join(cleaned_text.split())
             summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text
         except Exception as e:
+            return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
         # Get recommendations
         if keywords.strip():
         return thumbnail, summary, sentiment_label, recommendations
     except Exception as e:
+        print(f"Debug - Main Error: {str(e)}")
         return None, f"Error: {str(e)}", "N/A", ""