Spaces:

TarSh8654
/

Modified_tool

Sleeping

App Files Files Community

TarSh8654 commited on Jul 14, 2025

Commit

9c8e6cc

verified ·

1 Parent(s): 2828f18

Update app.py

Browse files

Files changed (1) hide show

app.py +201 -155

app.py CHANGED Viewed

@@ -3,10 +3,18 @@ import os
 import tempfile
 import logging
 import json
 # Import your dispatcher class from the local summarizer_tool.py file
 from summarizer_tool import AllInOneDispatcher
 # Configure logging for the Gradio app
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -18,183 +26,220 @@ try:
     logging.info("AllInOneDispatcher initialized successfully for Gradio app.")
 except Exception as e:
     logging.error(f"Failed to initialize AllInOneDispatcher: {e}")
-    # If dispatcher fails to initialize, the app might not work.
-    # Raise a runtime error to make the Space fail gracefully with a clear message.
     raise RuntimeError(f"Failed to initialize AI models. Check logs for details: {e}") from e
-# --- Gradio Interface Functions ---
-# Function to handle Text Processing
-def process_text_task(text_input: str, task_name: str, max_summary_len: int, min_summary_len: int, max_gen_tokens: int, num_gen_sequences: int, tts_lang: str):
-    """Handles various text-based AI tasks."""
-    if not text_input.strip():
-        return "Please enter some text.", None # Return None for audio output
-    kwargs = {}
-    if task_name == "summarization":
-        kwargs["max_length"] = max_summary_len
-        kwargs["min_length"] = min_summary_len
-    elif task_name == "text-generation":
-        kwargs["max_new_tokens"] = max_gen_tokens
-        kwargs["num_return_sequences"] = num_gen_sequences
-    elif task_name == "tts":
-        kwargs["lang"] = tts_lang
     try:
-        logging.info(f"Processing text with task: {task_name}")
-        result = dispatcher.process(text_input, task=task_name, **kwargs)
-        if task_name == "tts":
-            # For TTS, dispatcher.process returns a file path
-            if os.path.exists(result):
-                return "Speech generated successfully!", result # Return text message and audio file path
-            else:
-                return "TTS failed to generate audio.", None
-        else:
-            # For other text tasks, return the JSON representation of the result
-            return json.dumps(result, indent=2), None
-    except Exception as e:
-        logging.error(f"Error processing text: {e}")
-        return f"An error occurred: {e}", None
-# Function to handle File Processing
-def process_file_task(file_obj, task_name: str):
-    """Handles image, audio, PDF, and limited video processing."""
-    if file_obj is None:
-        return "Please upload a file."
-    # Gradio passes the file path directly for type="filepath"
-    file_path = file_obj
-    try:
-        logging.info(f"Processing file '{file_path}' with task: {task_name}")
-        result = dispatcher.process(file_path, task=task_name)
-        if task_name == "automatic-speech-recognition":
-            return result.get('text', 'No transcription found.')
-        elif task_name == "video":
-            # Video analysis returns a dict with image and audio results
-            return f"Video Analysis Result:\nImage Analysis: {json.dumps(result.get('image_analysis'), indent=2)}\nAudio Analysis: {json.dumps(result.get('audio_analysis'), indent=2)}"
         else:
-            return json.dumps(result, indent=2) # Use json.dumps for structured output
-    except NotImplementedError as e:
-        logging.error(f"Task not implemented: {e}")
-        return f"Task not fully implemented: {e}. Video processing is complex and requires system-level ffmpeg."
-    except ValueError as e:
-        logging.error(f"Value error processing file: {e}")
-        return f"Error processing file: {e}. Ensure the file type matches the selected task."
     except Exception as e:
-        logging.error(f"An unexpected error occurred during file processing: {e}")
         return f"An unexpected error occurred: {e}"
-# NEW: Function to handle Dataset Processing
-def process_dataset_task(dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int):
     """
-    Processes a specified column from a Hugging Face dataset using the dispatcher.
     """
-    if not dataset_name.strip() or not column_to_process.strip():
-        return "Please provide a Dataset Name and Column to Process."
-    # Define allowed tasks for dataset processing (based on what your dispatcher can handle)
-    allowed_dataset_tasks = [
-        "sentiment-analysis", "summarization", "text-generation", "image-classification",
-        "object-detection", "automatic-speech-recognition", "translation_en_to_fr"
-        # "pdf" and "video" are file-specific, not direct dataset column tasks
-    ]
-    if task not in allowed_dataset_tasks:
-        return f"Selected task '{task}' is not supported for dataset processing. Choose from: {', '.join(allowed_dataset_tasks)}"
     try:
-        logging.info(f"Starting dataset processing for {dataset_name}/{subset_name} on column '{column_to_process}' with task '{task}'.")
-        results = dispatcher.process_dataset_from_hub(
-            dataset_name=dataset_name,
-            subset_name=subset_name,
-            split=split,
-            column_to_process=column_to_process,
-            task=task,
-            num_samples=num_samples
-        )
-        return json.dumps(results, indent=2) # Return results as pretty-printed JSON
     except Exception as e:
-        logging.error(f"Error during dataset processing: {e}")
-        return f"An error occurred during dataset processing: {e}"
 # --- Gradio Interface Definition ---
-# Text Processing Tab
-text_tab_inputs = [
-    gr.Textbox(lines=8, label="Enter Text", placeholder="Type your text here for summarization, sentiment analysis, etc."),
-    gr.Dropdown(
-        ["sentiment-analysis", "summarization", "text-generation", "tts", "translation_en_to_fr"],
-        label="Select Text Task",
-        value="sentiment-analysis"
-    ),
-    gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Summary Length (for Summarization)"),
-    gr.Slider(minimum=5, maximum=100, value=10, step=1, label="Min Summary Length (for Summarization)"),
-    gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Generated Tokens (for Text Generation)"),
-    gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Number of Sequences (for Text Generation)"),
-    gr.Dropdown(["en", "fr", "es"], label="TTS Language", value="en")
-]
-text_tab_outputs = [
-    gr.Textbox(label="Analysis Result / Generated Text"),
-    gr.Audio(label="Generated Speech (for TTS)", type="filepath")
 ]
-text_interface = gr.Interface(
-    fn=process_text_task,
-    inputs=text_tab_inputs,
-    outputs=text_tab_outputs,
-    title="📝 Text Processing",
-    description="Perform various NLP tasks like sentiment analysis, summarization, text generation, and text-to-speech."
-)
-# File Processing Tab
-file_tab_inputs = [
-    gr.File(label="Upload File", type="filepath", file_types=[".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4", ".avi", ".mkv"]),
-    gr.Dropdown(
-        ["image-classification", "object-detection", "automatic-speech-recognition", "pdf", "video"],
-        label="Select File Task",
-        value="image-classification"
-    )
-]
-file_tab_outputs = gr.Textbox(label="File Processing Result")
-file_interface = gr.Interface(
-    fn=process_file_task,
-    inputs=file_tab_inputs,
-    outputs=file_tab_outputs,
-    title="📁 File Processing",
-    description="Upload an image, audio, PDF, or video file for AI analysis."
-)
-# Dataset Processing Tab
-dataset_tab_inputs = [
-    gr.Textbox(label="Hugging Face Dataset Name", placeholder="e.g., 'glue', 'mnist', 'common_voice'"),
-    gr.Textbox(label="Dataset Subset (Optional)", placeholder="e.g., 'sst2' for 'glue', 'en' for 'common_voice'"),
-    gr.Dropdown(["train", "validation", "test"], label="Dataset Split", value="train"),
-    gr.Textbox(label="Column to Process", placeholder="e.g., 'sentence', 'image', 'audio'"),
-    gr.Dropdown(
-        ["sentiment-analysis", "summarization", "text-generation", "image-classification",
-         "object-detection", "automatic-speech-recognition", "translation_en_to_fr"],
-        label="AI Task for Dataset Column",
-        value="sentiment-analysis"
-    ),
-    gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Samples to Process (max 20 for demo)"),
-]
-dataset_tab_outputs = gr.Textbox(label="Dataset Processing Results (JSON)")
-dataset_interface = gr.Interface(
-    fn=process_dataset_task,
-    inputs=dataset_tab_inputs,
-    outputs=dataset_tab_outputs,
-    title="📊 Dataset Processing",
-    description="Load a dataset from Hugging Face Hub and apply an AI task to a specified column (processes a limited number of samples)."
-)
-# Combine all interfaces into a Tabbed Interface
-demo = gr.TabbedInterface(
-    [text_interface, file_interface, dataset_interface], # Include all three interfaces
-    ["Text Analyzer", "File Analyzer", "Dataset Analyzer"] # Tab titles
 )
 # --- Launch the Gradio App ---
@@ -202,3 +247,4 @@ if __name__ == "__main__":
     # For local testing, use demo.launch()
     # For Hugging Face Spaces, ensure all dependencies are in requirements.txt
     demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)

 import tempfile
 import logging
 import json
+import requests # For Gemini API calls
 # Import your dispatcher class from the local summarizer_tool.py file
 from summarizer_tool import AllInOneDispatcher
+# --- Gemini API Configuration ---
+# The API key will be automatically provided by the Canvas environment at runtime
+# if left as an empty string. DO NOT hardcode your API key here.
+GEMINI_API_KEY = "" # Leave as empty string for Canvas environment
+GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
 # Configure logging for the Gradio app
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
     logging.info("AllInOneDispatcher initialized successfully for Gradio app.")
 except Exception as e:
     logging.error(f"Failed to initialize AllInOneDispatcher: {e}")
     raise RuntimeError(f"Failed to initialize AI models. Check logs for details: {e}") from e
+# --- Helper Function for Gemini API Call ---
+def call_gemini_api(prompt: str) -> str:
+    """
+    Calls the Gemini API with the given prompt and returns the generated text.
+    """
+    headers = {
+        'Content-Type': 'application/json',
+    }
+    payload = {
+        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
+    }
+    full_api_url = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}" if GEMINI_API_KEY else GEMINI_API_URL
     try:
+        response = requests.post(full_api_url, headers=headers, data=json.dumps(payload))
+        response.raise_for_status() # Raise an exception for HTTP errors
+        result = response.json()
+        if result.get("candidates") and len(result["candidates"]) > 0 and \
+           result["candidates"][0].get("content") and \
+           result["candidates"][0]["content"].get("parts") and \
+           len(result["candidates"][0]["content"]["parts"]) > 0:
+            return result["candidates"][0]["content"]["parts"][0]["text"]
         else:
+            return "I couldn't generate a response for that."
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Gemini API Call Error: {e}")
+        return f"An error occurred while connecting to the AI: {e}"
+    except json.JSONDecodeError:
+        logging.error(f"Gemini API Response Error: Could not decode JSON. Response: {response.text}")
+        return "An error occurred while processing the AI's response."
     except Exception as e:
+        logging.error(f"An unexpected error occurred during Gemini API call: {e}")
         return f"An unexpected error occurred: {e}"
+# --- Main Chat Function for Gradio ---
+async def chat_with_ai(message: str, history: list, selected_task: str, uploaded_file):
     """
+    Processes user messages, selected tasks, and uploaded files.
     """
+    response_text = ""
+    file_path = None
+    # Handle file upload first, if any
+    if uploaded_file is not None:
+        file_path = uploaded_file # Gradio passes the path directly for type="filepath"
+        logging.info(f"Received file: {file_path} for task: {selected_task}")
+        # Determine file type for task mapping
+        file_extension = os.path.splitext(file_path)[1].lower()
+        if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
+            if selected_task not in ["Image Classification", "Object Detection"]:
+                return "Please select 'Image Classification' or 'Object Detection' for image files."
+        elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a']:
+            if selected_task != "Automatic Speech Recognition":
+                return "Please select 'Automatic Speech Recognition' for audio files."
+        elif file_extension in ['.mp4', '.mov', '.avi', '.mkv']:
+            if selected_task != "Video Analysis":
+                return "Please select 'Video Analysis' for video files."
+        elif file_extension == '.pdf':
+            if selected_task != "PDF Summarization (RAG)":
+                return "Please select 'PDF Summarization (RAG)' for PDF files."
+        else:
+            return f"Unsupported file type: {file_extension}. Please upload a supported file or select 'General Chat'."
     try:
+        if selected_task == "General Chat":
+            # Use Gemini for general chat
+            prompt = f"User: {message}\nAI:"
+            response_text = call_gemini_api(prompt)
+            return response_text
+        elif selected_task == "Summarize Text":
+            if not message.strip(): return "Please provide text to summarize."
+            result = dispatcher.process(message, task="summarization", max_length=150, min_length=30)
+            response_text = f"Here's a summary of your text:\n\n{json.dumps(result, indent=2)}"
+            return response_text
+        elif selected_task == "Sentiment Analysis":
+            if not message.strip(): return "Please provide text for sentiment analysis."
+            result = dispatcher.process(message, task="sentiment-analysis")
+            response_text = f"The sentiment of your text is: {json.dumps(result, indent=2)}"
+            return response_text
+        elif selected_task == "Text Generation":
+            if not message.strip(): return "Please provide a prompt for text generation."
+            result = dispatcher.process(message, task="text-generation", max_new_tokens=100, num_return_sequences=1)
+            generated_text = result[0]['generated_text'] if result and isinstance(result, list) and result[0].get('generated_text') else str(result)
+            response_text = f"Here's the generated text:\n\n{generated_text}"
+            return response_text
+        elif selected_task == "Text-to-Speech (TTS)":
+            if not message.strip(): return "Please provide text for speech generation."
+            audio_path = dispatcher.process(message, task="tts", lang="en") # Default to English
+            if os.path.exists(audio_path):
+                # Gradio ChatInterface can return audio directly
+                return (f"Here's the audio for your text:", gr.Audio(audio_path, label="Generated Speech", autoplay=True))
+            else:
+                return "Failed to generate speech."
+        elif selected_task == "Translation (EN to FR)":
+            if not message.strip(): return "Please provide text to translate."
+            result = dispatcher.process(message, task="translation_en_to_fr")
+            translated_text = result[0]['translation_text'] if result and isinstance(result, list) and result[0].get('translation_text') else str(result)
+            response_text = f"Here's the English to French translation:\n\n{translated_text}"
+            return response_text
+        elif selected_task == "Image Classification":
+            if not file_path: return "Please upload an image file for classification."
+            result = dispatcher.process(file_path, task="image-classification")
+            response_text = f"Image Classification Result:\n\n{json.dumps(result, indent=2)}"
+            return response_text
+        elif selected_task == "Object Detection":
+            if not file_path: return "Please upload an image file for object detection."
+            result = dispatcher.process(file_path, task="object-detection")
+            response_text = f"Object Detection Result:\n\n{json.dumps(result, indent=2)}"
+            return response_text
+        elif selected_task == "Automatic Speech Recognition":
+            if not file_path: return "Please upload an audio file for transcription."
+            result = dispatcher.process(file_path, task="automatic-speech-recognition")
+            transcription = result.get('text', 'No transcription found.')
+            response_text = f"Audio Transcription:\n\n{transcription}"
+            return response_text
+        elif selected_task == "Video Analysis":
+            if not file_path: return "Please upload a video file for analysis."
+            result = dispatcher.process(file_path, task="video")
+            image_analysis = json.dumps(result.get('image_analysis'), indent=2)
+            audio_analysis = json.dumps(result.get('audio_analysis'), indent=2)
+            response_text = f"Video Analysis Result:\n\nImage Analysis:\n{image_analysis}\n\nAudio Analysis:\n{audio_analysis}"
+            return response_text
+        elif selected_task == "PDF Summarization (RAG)":
+            if not file_path: return "Please upload a PDF file for summarization."
+            result = dispatcher.process(file_path, task="pdf")
+            response_text = f"PDF Summary:\n\n{result}"
+            return response_text
+        elif selected_task == "Process Dataset":
+            # This task requires more specific parameters (dataset name, column, etc.)
+            # It's not directly compatible with a single chat message input.
+            # We'll guide the user to a separate interface for this, or simplify.
+            # For now, let's keep it simple: user provides dataset_name, subset, split, column in message.
+            # A more robust solution would involve a separate Gradio component for this.
+            return "For 'Process Dataset', please use the dedicated 'Dataset Analyzer' tab if it were available, or provide all parameters in your message like: 'dataset: glue, subset: sst2, split: train, column: sentence, task: sentiment-analysis, samples: 2'."
+            # Example of parsing:
+            # parts = message.split(',')
+            # params = {p.split(':')[0].strip(): p.split(':')[1].strip() for p in parts if ':' in p}
+            # dataset_name = params.get('dataset')
+            # subset_name = params.get('subset', '')
+            # split = params.get('split', 'train')
+            # column = params.get('column')
+            # task_for_dataset = params.get('task')
+            # num_samples = int(params.get('samples', 2))
+            # if not all([dataset_name, column, task_for_dataset]):
+            #     return "Please provide dataset name, column, and task for dataset processing."
+            # result = dispatcher.process_dataset_from_hub(dataset_name, subset_name, split, column, task_for_dataset, num_samples)
+            # return f"Dataset Processing Results:\n\n{json.dumps(result, indent=2)}"
+        else:
+            return "Please select a valid task from the dropdown."
     except Exception as e:
+        logging.error(f"An error occurred in chat_with_ai: {e}")
+        return f"An unexpected error occurred during processing: {e}"
+    finally:
+        # Clean up temporary file if it was uploaded and processed
+        if file_path and os.path.exists(file_path):
+            # Gradio handles temp file cleanup for gr.File(type="filepath")
+            # However, if you manually copy/save, ensure cleanup.
+            # For this setup, Gradio should handle it.
+            pass
 # --- Gradio Interface Definition ---
+# Define the choices for the task dropdown
+task_choices = [
+    "General Chat",
+    "Summarize Text",
+    "Sentiment Analysis",
+    "Text Generation",
+    "Text-to-Speech (TTS)",
+    "Translation (EN to FR)",
+    "Image Classification",
+    "Object Detection",
+    "Automatic Speech Recognition",
+    "Video Analysis",
+    "PDF Summarization (RAG)",
+    # "Process Dataset" - Removed for now as it needs more complex input than a simple chat
 ]
+# Create the ChatInterface
+demo = gr.ChatInterface(
+    fn=chat_with_ai,
+    textbox=gr.Textbox(placeholder="Ask me anything or provide text/files for analysis...", container=False, scale=7),
+    chatbot=gr.Chatbot(height=500),
+    # Add a file upload component
+    additional_inputs=[
+        gr.Dropdown(task_choices, label="Select Task", value="General Chat", container=True),
+        gr.File(label="Upload File (Optional)", type="filepath", file_types=[
+            ".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4", ".avi", ".mkv"
+        ])
+    ],
+    title="💬 Multimodal AI Assistant (Chat Interface)",
+    description="Interact with various AI models. Select a task and provide your input (text or file)."
 )
 # --- Launch the Gradio App ---
     # For local testing, use demo.launch()
     # For Hugging Face Spaces, ensure all dependencies are in requirements.txt
     demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)