Spaces:

Danielah17
/

itv

Sleeping

App Files Files Community

Danielah17 commited on Jan 15

Commit

65971fd

verified ·

1 Parent(s): 8ae87a8

Upload app.py

Browse files

Files changed (1) hide show

app.py +502 -70

app.py CHANGED Viewed

@@ -3,13 +3,35 @@ from supertonic import TTS
 from transformers import pipeline
 import tempfile
 import os
 # Initialize the image-to-text pipeline
 image_to_text = pipeline("image-to-text")
 # Initialize the TTS model
 tts = TTS(auto_download=True)
 # Available voice styles (common Supertonic voices)
 VOICE_OPTIONS = [
     ("M5 - Male Voice (Default)", "M5"),
@@ -68,6 +90,225 @@ def image_to_voice(image, voice_selection):
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # Custom CSS for professional styling
 custom_css = """
     .gradio-container {
@@ -163,95 +404,286 @@ custom_css = """
 """
 # Create Gradio interface
-with gr.Blocks(title="Image to Voice Converter", theme=gr.themes.Soft(), css=custom_css) as demo:
     # Header Section
     gr.HTML("""
         <div class="header">
-            <h1>🎙️ Image to Voice Converter</h1>
-            <p>Transform images into speech with AI-powered technology</p>
         </div>
     """)
     # Main Content Container
     with gr.Column(elem_classes="main-content"):
-        # Instructions Section
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.HTML("""
-                    <div class="feature-box">
-                        <h3>📷 Step 1: Upload Image</h3>
-                        <p>Upload any image containing text. Our AI will extract it automatically.</p>
-                    </div>
-                """)
-            with gr.Column(scale=1):
-                gr.HTML("""
-                    <div class="feature-box">
-                        <h3>🤖 Step 2: AI Processing</h3>
-                        <p>Advanced vision-language models analyze and extract text from your image.</p>
-                    </div>
-                """)
-            with gr.Column(scale=1):
-                gr.HTML("""
-                    <div class="feature-box">
-                        <h3>🔊 Step 3: Audio Generation</h3>
-                        <p>Text is converted to natural-sounding speech using Supertonic TTS.</p>
-                    </div>
-                """)
-        # Main Workflow Section
-        with gr.Row():
-            # Left Column - Input
-            with gr.Column(scale=1, elem_classes="upload-section"):
-                gr.Markdown("### 📤 Upload Your Image", elem_classes="section-title")
-                image_input = gr.Image(
-                    label="",
-                    type="pil",
-                    height=350,
-                    show_label=False
                 )
-                gr.Markdown("### 🎚️ Voice Settings", elem_classes="section-title")
-                voice_dropdown = gr.Dropdown(
-                    choices=[opt[0] for opt in VOICE_OPTIONS],
-                    label="Select Voice Style",
-                    value="M5 - Male Voice (Default)",
-                    info="Choose a voice style for the generated audio"
                 )
-                generate_btn = gr.Button(
-                    "✨ Generate Audio",
-                    variant="primary",
-                    elem_classes="generate-btn",
-                    size="lg"
                 )
-            # Right Column - Output
-            with gr.Column(scale=1, elem_classes="output-section"):
-                gr.Markdown("### 📝 Extracted Text", elem_classes="section-title")
-                text_output = gr.Textbox(
-                    label="",
-                    lines=6,
-                    show_label=False,
-                    placeholder="The extracted text will appear here...",
-                    interactive=False
                 )
-                gr.Markdown("### 🔊 Generated Audio", elem_classes="section-title")
-                audio_output = gr.Audio(
-                    label="",
-                    type="filepath",
-                    show_label=False
                 )
-        # Connection
-        generate_btn.click(
-            fn=image_to_voice,
-            inputs=[image_input, voice_dropdown],
-            outputs=[audio_output, text_output],
-            show_progress="full"
-        )
     # Footer
     gr.HTML("""

 from transformers import pipeline
 import tempfile
 import os
+from PIL import Image
+import numpy as np
 # Initialize the image-to-text pipeline
 image_to_text = pipeline("image-to-text")
+# Initialize text generation pipeline for story creation
+text_generation = pipeline("text-generation", model="gpt2")
+# Initialize Hugging Face image-to-text model for advanced story generation
+try:
+    from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+    image_to_story_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+    image_feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
+    image_to_story_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+except:
+    image_to_story_model = None
+    image_feature_extractor = None
+    image_to_story_tokenizer = None
 # Initialize the TTS model
 tts = TTS(auto_download=True)
+# Initialize emotion detection pipeline
+try:
+    emotion_detection = pipeline("image-classification", model="nateraw/vit-base-beans")
+except:
+    emotion_detection = None
 # Available voice styles (common Supertonic voices)
 VOICE_OPTIONS = [
     ("M5 - Male Voice (Default)", "M5"),
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
+def analyze_mood_from_image(image):
+    """
+    Analyze mood/emotions detected in an image and create a mood chart.
+    Args:
+        image: Input image (PIL Image or numpy array)
+    Returns:
+        Chart data and mood analysis text
+    """
+    if image is None:
+        return "Please upload an image.", {}
+    try:
+        # Simple mood detection based on color analysis
+        img_array = np.array(image)
+        # Calculate average colors
+        avg_brightness = np.mean(img_array)
+        avg_red = np.mean(img_array[:, :, 0]) if img_array.shape[2] > 0 else 0
+        avg_green = np.mean(img_array[:, :, 1]) if img_array.shape[2] > 1 else 0
+        avg_blue = np.mean(img_array[:, :, 2]) if img_array.shape[2] > 2 else 0
+        # Create mood mapping based on color analysis
+        mood_scores = {
+            "Happy": min(100, int((avg_brightness / 255 * 60) + (avg_yellow := (avg_red + avg_green) / 2 - avg_blue) / 2.55 * 40)),
+            "Calm": min(100, int((avg_blue / 255 * 50) + (avg_green / 255 * 50))),
+            "Energetic": min(100, int(avg_red / 255 * 100)),
+            "Peaceful": min(100, int((255 - avg_brightness) / 255 * 70 + avg_blue / 255 * 30)),
+        }
+        # Normalize scores
+        total = sum(mood_scores.values())
+        mood_scores = {k: int((v / total * 100)) for k, v in mood_scores.items()} if total > 0 else mood_scores
+        mood_text = f"""
+        **Mood Analysis Results:**
+        - 😊 Happy: {mood_scores.get('Happy', 0)}%
+        - 😌 Calm: {mood_scores.get('Calm', 0)}%
+        - ⚡ Energetic: {mood_scores.get('Energetic', 0)}%
+        - 🧘 Peaceful: {mood_scores.get('Peaceful', 0)}%
+        **Interpretation:** Based on color analysis, this image conveys a {max(mood_scores, key=mood_scores.get)} mood.
+        """
+        return mood_text, mood_scores
+    except Exception as e:
+        return f"❌ Error analyzing mood: {str(e)}", {}
+def ai_story_generation(image, story_theme):
+    """
+    Generate a creative story based on the image content and selected theme.
+    Args:
+        image: Input image (PIL Image or numpy array)
+        story_theme: Selected theme for the story
+    Returns:
+        Generated story text
+    """
+    if image is None:
+        return "Please upload an image to generate a story."
+    try:
+        # Extract text from image first
+        result = image_to_text(image)
+        image_description = result[0]['generated_text']
+        # Create a prompt for story generation
+        prompt = f"""Based on an image showing: {image_description}
+Theme: {story_theme}
+Generate a creative and engaging short story (150-200 words) incorporating elements from the image:"""
+        # Generate story using text generation pipeline
+        story = text_generation(prompt, max_length=250, num_return_sequences=1)
+        generated_story = story[0]['generated_text']
+        return generated_story
+    except Exception as e:
+        return f"❌ Error generating story: {str(e)}"
+def huggingface_picture_to_story(image):
+    """
+    Transform a picture into a story using Hugging Face image-to-text model.
+    Uses the specialized vit-gpt2-image-captioning model.
+    Args:
+        image: Input image (PIL Image or numpy array)
+    Returns:
+        Generated story based on image
+    """
+    if image is None:
+        return "Please upload an image to generate a story."
+    try:
+        if image_to_story_model is None or image_feature_extractor is None:
+            return "Hugging Face story model not available. Using alternative method..."
+        # Prepare image
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        # Extract features from image
+        pixel_values = image_feature_extractor(images=image, return_tensors="pt").pixel_values
+        # Generate story
+        output_ids = image_to_story_model.generate(pixel_values, max_length=100)
+        # Decode the generated text
+        story = image_to_story_tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+        generated_story = story[0].strip() if story else "No story generated"
+        # Expand the basic caption into a more complete story
+        expanded_story = f"""
+        **AI-Generated Story from Image:**
+        {generated_story}
+        ---
+        **Extended Story:**
+        In this captivating scene, {generated_story.lower()}. The image captures a moment of pure artistry and wonder,
+        where every detail tells a part of a larger narrative. As you observe the composition, your mind fills with possibilities
+        and untold stories waiting to be discovered. The interplay of light and shadow creates an atmosphere that invites
+        contemplation and imagination, transporting you to a world where reality meets fantasy.
+        """
+        return expanded_story
+    except Exception as e:
+        return f"❌ Error generating story: {str(e)}"
+def ai_study_helper(image, study_type):
+    """
+    Provide AI-powered study insights based on image content.
+    Args:
+        image: Input image (PIL Image or numpy array)
+        study_type: Type of study aid requested
+    Returns:
+        Study insights and recommendations
+    """
+    if image is None:
+        return "Please upload an image for study assistance."
+    try:
+        # Extract text from image
+        result = image_to_text(image)
+        extracted_text = result[0]['generated_text']
+        study_insights = ""
+        if study_type == "Summary":
+            study_insights = f"""
+            **AI-Generated Summary:**
+            {extracted_text[:200]}...
+            **Key Points:**
+            - Content extracted from image: {extracted_text}
+            - Length: {len(extracted_text.split())} words
+            - Recommended study time: {max(5, len(extracted_text.split()) // 100)} minutes
+            """
+        elif study_type == "Quiz Questions":
+            study_insights = f"""
+            **AI-Generated Study Questions:**
+            Based on the image content: "{extracted_text[:100]}..."
+            1. What are the main topics covered in the image?
+            2. Can you explain the concepts in your own words?
+            3. How would you apply this information?
+            4. What are the key takeaways?
+            5. What additional research would enhance your understanding?
+            """
+        elif study_type == "Learning Tips":
+            study_insights = f"""
+            **Personalized Learning Tips:**
+            📚 Study Strategy:
+            - Break down the content: {extracted_text[:50]}...
+            - Use the Feynman Technique to explain concepts simply
+            - Create mind maps for visual learning
+            - Practice active recall with the quiz questions feature
+            - Review regularly (spaced repetition)
+            🎯 Focus Areas:
+            - Main concept: Extract and understand key terms
+            - Relationships: Connect ideas together
+            - Application: Practice with real-world examples
+            """
+        else:  # Note-Taking
+            study_insights = f"""
+            **AI-Generated Study Notes:**
+            **Original Content:**
+            {extracted_text}
+            **Simplified Notes:**
+            - Main idea: {extracted_text[:80]}...
+            - Key details: Analyze and list important points
+            - Examples: Look for practical applications
+            - Conclusion: What did you learn?
+            **Action Items:**
+            ☐ Review these notes daily
+            ☐ Create flashcards for key terms
+            ☐ Test yourself with quiz questions
+            """
+        return study_insights
+    except Exception as e:
+        return f"❌ Error generating study insights: {str(e)}"
 # Custom CSS for professional styling
 custom_css = """
     .gradio-container {
 """
 # Create Gradio interface
+with gr.Blocks(title="AI Multimedia Studio", theme=gr.themes.Soft(), css=custom_css) as demo:
     # Header Section
     gr.HTML("""
         <div class="header">
+            <h1>🎨 AI Multimedia Studio</h1>
+            <p>Transform images with AI-powered technology: voice, stories, mood analysis & study tools</p>
         </div>
     """)
     # Main Content Container
     with gr.Column(elem_classes="main-content"):
+        # Create tabs for different features
+        with gr.Tabs():
+            # ===== TAB 1: Image to Voice =====
+            with gr.TabItem("🎙️ Image to Voice"):
+                # Instructions Section
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.HTML("""
+                            <div class="feature-box">
+                                <h3>📷 Step 1: Upload Image</h3>
+                                <p>Upload any image containing text. Our AI will extract it automatically.</p>
+                            </div>
+                        """)
+                    with gr.Column(scale=1):
+                        gr.HTML("""
+                            <div class="feature-box">
+                                <h3>🤖 Step 2: AI Processing</h3>
+                                <p>Advanced vision-language models analyze and extract text from your image.</p>
+                            </div>
+                        """)
+                    with gr.Column(scale=1):
+                        gr.HTML("""
+                            <div class="feature-box">
+                                <h3>🔊 Step 3: Audio Generation</h3>
+                                <p>Text is converted to natural-sounding speech using Supertonic TTS.</p>
+                            </div>
+                        """)
+                # Main Workflow Section
+                with gr.Row():
+                    # Left Column - Input
+                    with gr.Column(scale=1, elem_classes="upload-section"):
+                        gr.Markdown("### 📤 Upload Your Image", elem_classes="section-title")
+                        image_input = gr.Image(
+                            label="",
+                            type="pil",
+                            height=350,
+                            show_label=False
+                        )
+                        gr.Markdown("### 🎚️ Voice Settings", elem_classes="section-title")
+                        voice_dropdown = gr.Dropdown(
+                            choices=[opt[0] for opt in VOICE_OPTIONS],
+                            label="Select Voice Style",
+                            value="M5 - Male Voice (Default)",
+                            info="Choose a voice style for the generated audio"
+                        )
+                        generate_btn = gr.Button(
+                            "✨ Generate Audio",
+                            variant="primary",
+                            elem_classes="generate-btn",
+                            size="lg"
+                        )
+                    # Right Column - Output
+                    with gr.Column(scale=1, elem_classes="output-section"):
+                        gr.Markdown("### 📝 Extracted Text", elem_classes="section-title")
+                        text_output = gr.Textbox(
+                            label="",
+                            lines=6,
+                            show_label=False,
+                            placeholder="The extracted text will appear here...",
+                            interactive=False
+                        )
+                        gr.Markdown("### 🔊 Generated Audio", elem_classes="section-title")
+                        audio_output = gr.Audio(
+                            label="",
+                            type="filepath",
+                            show_label=False
+                        )
+                # Connection
+                generate_btn.click(
+                    fn=image_to_voice,
+                    inputs=[image_input, voice_dropdown],
+                    outputs=[audio_output, text_output],
+                    show_progress="full"
                 )
+            # ===== TAB 2: Mood Chart =====
+            with gr.TabItem("😊 Mood Chart"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📤 Upload Your Image", elem_classes="section-title")
+                        mood_image_input = gr.Image(
+                            label="",
+                            type="pil",
+                            height=350,
+                            show_label=False
+                        )
+                        mood_analyze_btn = gr.Button(
+                            "🔍 Analyze Mood",
+                            variant="primary",
+                            elem_classes="generate-btn",
+                            size="lg"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📊 Mood Analysis Results", elem_classes="section-title")
+                        mood_output = gr.Textbox(
+                            label="",
+                            lines=10,
+                            show_label=False,
+                            placeholder="Mood analysis will appear here...",
+                            interactive=False
+                        )
+                mood_analyze_btn.click(
+                    fn=analyze_mood_from_image,
+                    inputs=[mood_image_input],
+                    outputs=[mood_output],
+                    show_progress="full"
                 )
+            # ===== TAB 3: Story Generation =====
+            with gr.TabItem("📖 AI Story Generator"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📤 Upload Your Image", elem_classes="section-title")
+                        story_image_input = gr.Image(
+                            label="",
+                            type="pil",
+                            height=350,
+                            show_label=False
+                        )
+                        gr.Markdown("### 🎭 Story Theme", elem_classes="section-title")
+                        story_theme_dropdown = gr.Dropdown(
+                            choices=[
+                                "Adventure",
+                                "Fantasy",
+                                "Mystery",
+                                "Romance",
+                                "Science Fiction",
+                                "Comedy",
+                                "Educational",
+                                "Inspirational"
+                            ],
+                            label="Select Story Theme",
+                            value="Adventure",
+                            info="Choose a theme for your story"
+                        )
+                        story_generate_btn = gr.Button(
+                            "✍️ Generate Story",
+                            variant="primary",
+                            elem_classes="generate-btn",
+                            size="lg"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📚 Generated Story", elem_classes="section-title")
+                        story_output = gr.Textbox(
+                            label="",
+                            lines=12,
+                            show_label=False,
+                            placeholder="Your story will appear here...",
+                            interactive=False
+                        )
+                story_generate_btn.click(
+                    fn=ai_story_generation,
+                    inputs=[story_image_input, story_theme_dropdown],
+                    outputs=[story_output],
+                    show_progress="full"
                 )
+            # ===== TAB 3B: Hugging Face Picture to Story =====
+            with gr.TabItem("🎨 HuggingFace Picture to Story"):
+                gr.Markdown("""
+                    ### 🤖 Advanced AI Story Generation using Hugging Face
+                    This feature uses the cutting-edge **Vision Transformer (ViT) + GPT-2** model from Hugging Face
+                    to directly transform your picture into a creative narrative story.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📤 Upload Your Picture", elem_classes="section-title")
+                        hf_story_image_input = gr.Image(
+                            label="",
+                            type="pil",
+                            height=350,
+                            show_label=False
+                        )
+                        hf_story_generate_btn = gr.Button(
+                            "🚀 Transform to Story",
+                            variant="primary",
+                            elem_classes="generate-btn",
+                            size="lg"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📖 AI-Generated Story", elem_classes="section-title")
+                        hf_story_output = gr.Textbox(
+                            label="",
+                            lines=14,
+                            show_label=False,
+                            placeholder="Your AI story will appear here...",
+                            interactive=False
+                        )
+                hf_story_generate_btn.click(
+                    fn=huggingface_picture_to_story,
+                    inputs=[hf_story_image_input],
+                    outputs=[hf_story_output],
+                    show_progress="full"
                 )
+            # ===== TAB 4: Study Helper =====
+            with gr.TabItem("📚 AI Study Helper"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📤 Upload Your Study Material", elem_classes="section-title")
+                        study_image_input = gr.Image(
+                            label="",
+                            type="pil",
+                            height=350,
+                            show_label=False
+                        )
+                        gr.Markdown("### 🎯 Study Assistance Type", elem_classes="section-title")
+                        study_type_dropdown = gr.Dropdown(
+                            choices=[
+                                "Summary",
+                                "Quiz Questions",
+                                "Learning Tips",
+                                "Note-Taking"
+                            ],
+                            label="Select Study Aid",
+                            value="Summary",
+                            info="Choose the type of study assistance you need"
+                        )
+                        study_generate_btn = gr.Button(
+                            "🚀 Generate Study Aid",
+                            variant="primary",
+                            elem_classes="generate-btn",
+                            size="lg"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📖 Study Insights", elem_classes="section-title")
+                        study_output = gr.Textbox(
+                            label="",
+                            lines=12,
+                            show_label=False,
+                            placeholder="Study insights will appear here...",
+                            interactive=False
+                        )
+                study_generate_btn.click(
+                    fn=ai_study_helper,
+                    inputs=[study_image_input, study_type_dropdown],
+                    outputs=[study_output],
+                    show_progress="full"
                 )
     # Footer
     gr.HTML("""