Spaces:

danielrosehill
/

Context-Cruncher

Sleeping

File size: 10,409 Bytes

59c7f4b

"""
Context Cruncher - Gradio Application
Extract structured context data from voice recordings using Gemini AI.
"""
import gradio as gr
import os
from pathlib import Path
import tempfile
from dotenv import load_dotenv
from gemini_processor import (
    process_audio_with_gemini,
    create_markdown_file,
    create_json_file
)

# Load environment variables
load_dotenv()


def process_audio(
    audio_input,
    uploaded_file,
    api_key: str,
    user_identification: str,
    user_name: str = ""
) -> tuple:
    """
    Process audio from either recording or upload.

    Args:
        audio_input: Audio from microphone recording
        uploaded_file: Uploaded audio file
        api_key: Gemini API key
        user_identification: "name" or "user"
        user_name: User's name if using name identification

    Returns:
        Tuple of (markdown_content, markdown_file, json_file, status_message)
    """
    try:
        # Validate API key
        if not api_key or api_key.strip() == "":
            return (
                "",
                None,
                None,
                "Error: Please provide a Gemini API key"
            )

        # Determine which audio source to use
        audio_path = None
        if audio_input is not None:
            audio_path = audio_input
        elif uploaded_file is not None:
            audio_path = uploaded_file.name

        if audio_path is None:
            return (
                "",
                None,
                None,
                "Error: Please record audio or upload an audio file"
            )

        # Determine user reference
        user_ref = None
        if user_identification == "name":
            if not user_name or user_name.strip() == "":
                return (
                    "",
                    None,
                    None,
                    "Error: Please provide your name when using name identification"
                )
            user_ref = user_name.strip()

        # Process with Gemini
        status_msg = "Processing audio with Gemini API..."
        context_markdown, human_readable_name, snake_case_filename = process_audio_with_gemini(
            audio_path,
            api_key,
            user_ref
        )

        # Create output files
        md_filename, md_content = create_markdown_file(
            context_markdown,
            human_readable_name,
            snake_case_filename
        )

        json_filename, json_content = create_json_file(
            context_markdown,
            human_readable_name,
            snake_case_filename
        )

        # Write files to temp directory for download
        temp_dir = tempfile.mkdtemp()
        md_path = Path(temp_dir) / md_filename
        json_path = Path(temp_dir) / json_filename

        with open(md_path, 'w') as f:
            f.write(md_content)

        with open(json_path, 'w') as f:
            f.write(json_content)

        return (
            md_content,
            str(md_path),
            str(json_path),
            f"Success! Context extracted: {human_readable_name}"
        )

    except Exception as e:
        return (
            "",
            None,
            None,
            f"Error: {str(e)}"
        )


# Custom CSS for better styling
custom_css = """
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
}
.main-header {
    text-align: center;
    margin-bottom: 1.5rem;
    padding-bottom: 1rem;
    border-bottom: 2px solid #e5e7eb;
}
.main-header h1 {
    font-size: 2rem;
    font-weight: 600;
    color: #1f2937;
    margin-bottom: 0.5rem;
}
.main-header p {
    color: #6b7280;
    font-size: 1rem;
}
.section-header {
    font-weight: 600;
    color: #374151;
    margin-bottom: 1rem;
}
"""

# Create Gradio interface
with gr.Blocks(css=custom_css, title="Context Cruncher") as demo:
    gr.Markdown(
        """
        # Context Cruncher

        Extract structured context data from voice recordings using AI
        """,
        elem_classes="main-header"
    )

    with gr.Tabs():
        with gr.Tab("Extract"):
            with gr.Row():
                with gr.Column(scale=1):
                    with gr.Accordion("Configuration", open=True):
                        api_key_input = gr.Textbox(
                            label="Gemini API Key",
                            placeholder="Enter your Gemini API key",
                            type="password",
                            value=os.getenv("GEMINI_API", ""),
                            info="Get your API key from https://ai.google.dev/"
                        )

                        user_identification = gr.Radio(
                            choices=["user", "name"],
                            value="user",
                            label="User Identification",
                            info="How should you be referred to in the context data?"
                        )

                        user_name_input = gr.Textbox(
                            label="Your Name",
                            placeholder="Enter your name",
                            visible=False,
                            info="Used when 'name' is selected above"
                        )

                    gr.Markdown("### Audio Input", elem_classes="section-header")

                    audio_recording = gr.Audio(
                        sources=["microphone"],
                        type="filepath",
                        label="Record Audio"
                    )

                    gr.Markdown("**OR**")

                    audio_upload = gr.File(
                        label="Upload Audio File",
                        file_types=["audio"],
                        type="filepath"
                    )

                    process_btn = gr.Button("Extract Context", variant="primary", size="lg")

                with gr.Column(scale=1):
                    gr.Markdown("### Results", elem_classes="section-header")

                    status_output = gr.Textbox(
                        label="Status",
                        interactive=False,
                        show_label=True
                    )

                    context_display = gr.Textbox(
                        label="Context Data (Markdown)",
                        lines=18,
                        interactive=False,
                        show_copy_button=True
                    )

                    with gr.Row():
                        markdown_download = gr.File(label="Download Markdown")
                        json_download = gr.File(label="Download JSON")

        with gr.Tab("About"):
            gr.Markdown(
                """
                ## What is Context Cruncher?

                Context Cruncher transforms casual voice recordings into clean, structured context data
                that AI systems can use for personalization.

                **Context data** refers to specific information about users that grounds AI inference
                for more personalized results.

                ## How It Works

                1. **Configure** - Enter your Gemini API key and choose how you want to be identified
                2. **Input Audio** - Either record directly in your browser or upload an audio file (MP3, WAV, OPUS)
                3. **Extract** - Click the button and let AI clean up your recording into structured context data
                4. **Download** - Get your context data as Markdown or JSON, or copy directly from the text area

                ## What Gets Extracted

                This tool processes your audio by:

                - Removing irrelevant information and tangents
                - Eliminating duplicates and redundancy
                - Reformatting from first person to third person
                - Organizing information hierarchically
                - Outputting both Markdown and JSON formats

                ## Example Transformation

                **Raw Audio:**
                > "Okay so... let's document my health problems... I've had asthma since I was a kid.
                > I take a daily inhaler called Relvar for that. Oh hey Jay! What's up!
                > Okay, where was I... I also take Vyvanse for ADHD."

                **Structured Output:**
                ```markdown
                ## Medical Conditions

                - the user has had asthma since childhood
                - the user has adult ADHD

                ## Medication List

                - the user takes Relvar, daily, for asthma
                - the user takes Vyvanse for ADHD
                ```

                ## Privacy Notice

                Your audio is processed using the Gemini API. Review Google's privacy policies
                before using this tool with sensitive information.

                ## Technical Details

                - **AI Model**: Gemini 2.0 Flash (multimodal audio understanding)
                - **Processing**: Direct audio file upload to Gemini API
                - **Output Formats**: Markdown and JSON

                ## Use Cases

                - AI assistant personalization
                - Knowledge management
                - Preference mapping
                - Medical history documentation (note privacy considerations)
                - Project context capture
                """
            )

    # Show/hide name input based on identification method
    def toggle_name_input(identification_choice):
        return gr.update(visible=identification_choice == "name")

    user_identification.change(
        fn=toggle_name_input,
        inputs=[user_identification],
        outputs=[user_name_input]
    )

    # Process button click
    process_btn.click(
        fn=process_audio,
        inputs=[
            audio_recording,
            audio_upload,
            api_key_input,
            user_identification,
            user_name_input
        ],
        outputs=[
            context_display,
            markdown_download,
            json_download,
            status_output
        ]
    )


if __name__ == "__main__":
    # For Hugging Face Spaces, share should be False
    # Set server_name to 0.0.0.0 for Spaces compatibility
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )