TarSh8654 commited on
Commit
9c8e6cc
·
verified ·
1 Parent(s): 2828f18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -155
app.py CHANGED
@@ -3,10 +3,18 @@ import os
3
  import tempfile
4
  import logging
5
  import json
 
6
 
7
  # Import your dispatcher class from the local summarizer_tool.py file
8
  from summarizer_tool import AllInOneDispatcher
9
 
 
 
 
 
 
 
 
10
  # Configure logging for the Gradio app
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
@@ -18,183 +26,220 @@ try:
18
  logging.info("AllInOneDispatcher initialized successfully for Gradio app.")
19
  except Exception as e:
20
  logging.error(f"Failed to initialize AllInOneDispatcher: {e}")
21
- # If dispatcher fails to initialize, the app might not work.
22
- # Raise a runtime error to make the Space fail gracefully with a clear message.
23
  raise RuntimeError(f"Failed to initialize AI models. Check logs for details: {e}") from e
24
 
25
- # --- Gradio Interface Functions ---
26
-
27
- # Function to handle Text Processing
28
- def process_text_task(text_input: str, task_name: str, max_summary_len: int, min_summary_len: int, max_gen_tokens: int, num_gen_sequences: int, tts_lang: str):
29
- """Handles various text-based AI tasks."""
30
- if not text_input.strip():
31
- return "Please enter some text.", None # Return None for audio output
 
 
 
 
32
 
33
- kwargs = {}
34
- if task_name == "summarization":
35
- kwargs["max_length"] = max_summary_len
36
- kwargs["min_length"] = min_summary_len
37
- elif task_name == "text-generation":
38
- kwargs["max_new_tokens"] = max_gen_tokens
39
- kwargs["num_return_sequences"] = num_gen_sequences
40
- elif task_name == "tts":
41
- kwargs["lang"] = tts_lang
42
 
43
  try:
44
- logging.info(f"Processing text with task: {task_name}")
45
- result = dispatcher.process(text_input, task=task_name, **kwargs)
46
 
47
- if task_name == "tts":
48
- # For TTS, dispatcher.process returns a file path
49
- if os.path.exists(result):
50
- return "Speech generated successfully!", result # Return text message and audio file path
51
- else:
52
- return "TTS failed to generate audio.", None
53
- else:
54
- # For other text tasks, return the JSON representation of the result
55
- return json.dumps(result, indent=2), None
56
- except Exception as e:
57
- logging.error(f"Error processing text: {e}")
58
- return f"An error occurred: {e}", None
59
 
60
- # Function to handle File Processing
61
- def process_file_task(file_obj, task_name: str):
62
- """Handles image, audio, PDF, and limited video processing."""
63
- if file_obj is None:
64
- return "Please upload a file."
65
-
66
- # Gradio passes the file path directly for type="filepath"
67
- file_path = file_obj
68
-
69
- try:
70
- logging.info(f"Processing file '{file_path}' with task: {task_name}")
71
- result = dispatcher.process(file_path, task=task_name)
72
-
73
- if task_name == "automatic-speech-recognition":
74
- return result.get('text', 'No transcription found.')
75
- elif task_name == "video":
76
- # Video analysis returns a dict with image and audio results
77
- return f"Video Analysis Result:\nImage Analysis: {json.dumps(result.get('image_analysis'), indent=2)}\nAudio Analysis: {json.dumps(result.get('audio_analysis'), indent=2)}"
78
  else:
79
- return json.dumps(result, indent=2) # Use json.dumps for structured output
80
-
81
- except NotImplementedError as e:
82
- logging.error(f"Task not implemented: {e}")
83
- return f"Task not fully implemented: {e}. Video processing is complex and requires system-level ffmpeg."
84
- except ValueError as e:
85
- logging.error(f"Value error processing file: {e}")
86
- return f"Error processing file: {e}. Ensure the file type matches the selected task."
87
  except Exception as e:
88
- logging.error(f"An unexpected error occurred during file processing: {e}")
89
  return f"An unexpected error occurred: {e}"
90
 
91
- # NEW: Function to handle Dataset Processing
92
- def process_dataset_task(dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int):
93
  """
94
- Processes a specified column from a Hugging Face dataset using the dispatcher.
95
  """
96
- if not dataset_name.strip() or not column_to_process.strip():
97
- return "Please provide a Dataset Name and Column to Process."
98
-
99
- # Define allowed tasks for dataset processing (based on what your dispatcher can handle)
100
- allowed_dataset_tasks = [
101
- "sentiment-analysis", "summarization", "text-generation", "image-classification",
102
- "object-detection", "automatic-speech-recognition", "translation_en_to_fr"
103
- # "pdf" and "video" are file-specific, not direct dataset column tasks
104
- ]
105
- if task not in allowed_dataset_tasks:
106
- return f"Selected task '{task}' is not supported for dataset processing. Choose from: {', '.join(allowed_dataset_tasks)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  try:
109
- logging.info(f"Starting dataset processing for {dataset_name}/{subset_name} on column '{column_to_process}' with task '{task}'.")
110
- results = dispatcher.process_dataset_from_hub(
111
- dataset_name=dataset_name,
112
- subset_name=subset_name,
113
- split=split,
114
- column_to_process=column_to_process,
115
- task=task,
116
- num_samples=num_samples
117
- )
118
- return json.dumps(results, indent=2) # Return results as pretty-printed JSON
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  except Exception as e:
120
- logging.error(f"Error during dataset processing: {e}")
121
- return f"An error occurred during dataset processing: {e}"
 
 
 
 
 
 
 
122
 
123
 
124
  # --- Gradio Interface Definition ---
125
 
126
- # Text Processing Tab
127
- text_tab_inputs = [
128
- gr.Textbox(lines=8, label="Enter Text", placeholder="Type your text here for summarization, sentiment analysis, etc."),
129
- gr.Dropdown(
130
- ["sentiment-analysis", "summarization", "text-generation", "tts", "translation_en_to_fr"],
131
- label="Select Text Task",
132
- value="sentiment-analysis"
133
- ),
134
- gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Summary Length (for Summarization)"),
135
- gr.Slider(minimum=5, maximum=100, value=10, step=1, label="Min Summary Length (for Summarization)"),
136
- gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Generated Tokens (for Text Generation)"),
137
- gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Number of Sequences (for Text Generation)"),
138
- gr.Dropdown(["en", "fr", "es"], label="TTS Language", value="en")
139
- ]
140
- text_tab_outputs = [
141
- gr.Textbox(label="Analysis Result / Generated Text"),
142
- gr.Audio(label="Generated Speech (for TTS)", type="filepath")
143
  ]
144
- text_interface = gr.Interface(
145
- fn=process_text_task,
146
- inputs=text_tab_inputs,
147
- outputs=text_tab_outputs,
148
- title="📝 Text Processing",
149
- description="Perform various NLP tasks like sentiment analysis, summarization, text generation, and text-to-speech."
150
- )
151
 
152
- # File Processing Tab
153
- file_tab_inputs = [
154
- gr.File(label="Upload File", type="filepath", file_types=[".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4", ".avi", ".mkv"]),
155
- gr.Dropdown(
156
- ["image-classification", "object-detection", "automatic-speech-recognition", "pdf", "video"],
157
- label="Select File Task",
158
- value="image-classification"
159
- )
160
- ]
161
- file_tab_outputs = gr.Textbox(label="File Processing Result")
162
- file_interface = gr.Interface(
163
- fn=process_file_task,
164
- inputs=file_tab_inputs,
165
- outputs=file_tab_outputs,
166
- title="📁 File Processing",
167
- description="Upload an image, audio, PDF, or video file for AI analysis."
168
- )
169
-
170
- # Dataset Processing Tab
171
- dataset_tab_inputs = [
172
- gr.Textbox(label="Hugging Face Dataset Name", placeholder="e.g., 'glue', 'mnist', 'common_voice'"),
173
- gr.Textbox(label="Dataset Subset (Optional)", placeholder="e.g., 'sst2' for 'glue', 'en' for 'common_voice'"),
174
- gr.Dropdown(["train", "validation", "test"], label="Dataset Split", value="train"),
175
- gr.Textbox(label="Column to Process", placeholder="e.g., 'sentence', 'image', 'audio'"),
176
- gr.Dropdown(
177
- ["sentiment-analysis", "summarization", "text-generation", "image-classification",
178
- "object-detection", "automatic-speech-recognition", "translation_en_to_fr"],
179
- label="AI Task for Dataset Column",
180
- value="sentiment-analysis"
181
- ),
182
- gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Samples to Process (max 20 for demo)"),
183
- ]
184
- dataset_tab_outputs = gr.Textbox(label="Dataset Processing Results (JSON)")
185
- dataset_interface = gr.Interface(
186
- fn=process_dataset_task,
187
- inputs=dataset_tab_inputs,
188
- outputs=dataset_tab_outputs,
189
- title="📊 Dataset Processing",
190
- description="Load a dataset from Hugging Face Hub and apply an AI task to a specified column (processes a limited number of samples)."
191
- )
192
-
193
-
194
- # Combine all interfaces into a Tabbed Interface
195
- demo = gr.TabbedInterface(
196
- [text_interface, file_interface, dataset_interface], # Include all three interfaces
197
- ["Text Analyzer", "File Analyzer", "Dataset Analyzer"] # Tab titles
198
  )
199
 
200
  # --- Launch the Gradio App ---
@@ -202,3 +247,4 @@ if __name__ == "__main__":
202
  # For local testing, use demo.launch()
203
  # For Hugging Face Spaces, ensure all dependencies are in requirements.txt
204
  demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)
 
 
3
  import tempfile
4
  import logging
5
  import json
6
+ import requests # For Gemini API calls
7
 
8
  # Import your dispatcher class from the local summarizer_tool.py file
9
  from summarizer_tool import AllInOneDispatcher
10
 
11
+ # --- Gemini API Configuration ---
12
+ # The API key will be automatically provided by the Canvas environment at runtime
13
+ # if left as an empty string. DO NOT hardcode your API key here.
14
+ GEMINI_API_KEY = "" # Leave as empty string for Canvas environment
15
+ GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
16
+
17
+
18
  # Configure logging for the Gradio app
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
 
 
26
  logging.info("AllInOneDispatcher initialized successfully for Gradio app.")
27
  except Exception as e:
28
  logging.error(f"Failed to initialize AllInOneDispatcher: {e}")
 
 
29
  raise RuntimeError(f"Failed to initialize AI models. Check logs for details: {e}") from e
30
 
31
+ # --- Helper Function for Gemini API Call ---
32
+ def call_gemini_api(prompt: str) -> str:
33
+ """
34
+ Calls the Gemini API with the given prompt and returns the generated text.
35
+ """
36
+ headers = {
37
+ 'Content-Type': 'application/json',
38
+ }
39
+ payload = {
40
+ "contents": [{"role": "user", "parts": [{"text": prompt}]}],
41
+ }
42
 
43
+ full_api_url = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}" if GEMINI_API_KEY else GEMINI_API_URL
 
 
 
 
 
 
 
 
44
 
45
  try:
46
+ response = requests.post(full_api_url, headers=headers, data=json.dumps(payload))
47
+ response.raise_for_status() # Raise an exception for HTTP errors
48
 
49
+ result = response.json()
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ if result.get("candidates") and len(result["candidates"]) > 0 and \
52
+ result["candidates"][0].get("content") and \
53
+ result["candidates"][0]["content"].get("parts") and \
54
+ len(result["candidates"][0]["content"]["parts"]) > 0:
55
+ return result["candidates"][0]["content"]["parts"][0]["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  else:
57
+ return "I couldn't generate a response for that."
58
+ except requests.exceptions.RequestException as e:
59
+ logging.error(f"Gemini API Call Error: {e}")
60
+ return f"An error occurred while connecting to the AI: {e}"
61
+ except json.JSONDecodeError:
62
+ logging.error(f"Gemini API Response Error: Could not decode JSON. Response: {response.text}")
63
+ return "An error occurred while processing the AI's response."
 
64
  except Exception as e:
65
+ logging.error(f"An unexpected error occurred during Gemini API call: {e}")
66
  return f"An unexpected error occurred: {e}"
67
 
68
+ # --- Main Chat Function for Gradio ---
69
+ async def chat_with_ai(message: str, history: list, selected_task: str, uploaded_file):
70
  """
71
+ Processes user messages, selected tasks, and uploaded files.
72
  """
73
+ response_text = ""
74
+ file_path = None
75
+
76
+ # Handle file upload first, if any
77
+ if uploaded_file is not None:
78
+ file_path = uploaded_file # Gradio passes the path directly for type="filepath"
79
+ logging.info(f"Received file: {file_path} for task: {selected_task}")
80
+
81
+ # Determine file type for task mapping
82
+ file_extension = os.path.splitext(file_path)[1].lower()
83
+
84
+ if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
85
+ if selected_task not in ["Image Classification", "Object Detection"]:
86
+ return "Please select 'Image Classification' or 'Object Detection' for image files."
87
+ elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a']:
88
+ if selected_task != "Automatic Speech Recognition":
89
+ return "Please select 'Automatic Speech Recognition' for audio files."
90
+ elif file_extension in ['.mp4', '.mov', '.avi', '.mkv']:
91
+ if selected_task != "Video Analysis":
92
+ return "Please select 'Video Analysis' for video files."
93
+ elif file_extension == '.pdf':
94
+ if selected_task != "PDF Summarization (RAG)":
95
+ return "Please select 'PDF Summarization (RAG)' for PDF files."
96
+ else:
97
+ return f"Unsupported file type: {file_extension}. Please upload a supported file or select 'General Chat'."
98
+
99
 
100
  try:
101
+ if selected_task == "General Chat":
102
+ # Use Gemini for general chat
103
+ prompt = f"User: {message}\nAI:"
104
+ response_text = call_gemini_api(prompt)
105
+ return response_text
106
+
107
+ elif selected_task == "Summarize Text":
108
+ if not message.strip(): return "Please provide text to summarize."
109
+ result = dispatcher.process(message, task="summarization", max_length=150, min_length=30)
110
+ response_text = f"Here's a summary of your text:\n\n{json.dumps(result, indent=2)}"
111
+ return response_text
112
+
113
+ elif selected_task == "Sentiment Analysis":
114
+ if not message.strip(): return "Please provide text for sentiment analysis."
115
+ result = dispatcher.process(message, task="sentiment-analysis")
116
+ response_text = f"The sentiment of your text is: {json.dumps(result, indent=2)}"
117
+ return response_text
118
+
119
+ elif selected_task == "Text Generation":
120
+ if not message.strip(): return "Please provide a prompt for text generation."
121
+ result = dispatcher.process(message, task="text-generation", max_new_tokens=100, num_return_sequences=1)
122
+ generated_text = result[0]['generated_text'] if result and isinstance(result, list) and result[0].get('generated_text') else str(result)
123
+ response_text = f"Here's the generated text:\n\n{generated_text}"
124
+ return response_text
125
+
126
+ elif selected_task == "Text-to-Speech (TTS)":
127
+ if not message.strip(): return "Please provide text for speech generation."
128
+ audio_path = dispatcher.process(message, task="tts", lang="en") # Default to English
129
+ if os.path.exists(audio_path):
130
+ # Gradio ChatInterface can return audio directly
131
+ return (f"Here's the audio for your text:", gr.Audio(audio_path, label="Generated Speech", autoplay=True))
132
+ else:
133
+ return "Failed to generate speech."
134
+
135
+ elif selected_task == "Translation (EN to FR)":
136
+ if not message.strip(): return "Please provide text to translate."
137
+ result = dispatcher.process(message, task="translation_en_to_fr")
138
+ translated_text = result[0]['translation_text'] if result and isinstance(result, list) and result[0].get('translation_text') else str(result)
139
+ response_text = f"Here's the English to French translation:\n\n{translated_text}"
140
+ return response_text
141
+
142
+ elif selected_task == "Image Classification":
143
+ if not file_path: return "Please upload an image file for classification."
144
+ result = dispatcher.process(file_path, task="image-classification")
145
+ response_text = f"Image Classification Result:\n\n{json.dumps(result, indent=2)}"
146
+ return response_text
147
+
148
+ elif selected_task == "Object Detection":
149
+ if not file_path: return "Please upload an image file for object detection."
150
+ result = dispatcher.process(file_path, task="object-detection")
151
+ response_text = f"Object Detection Result:\n\n{json.dumps(result, indent=2)}"
152
+ return response_text
153
+
154
+ elif selected_task == "Automatic Speech Recognition":
155
+ if not file_path: return "Please upload an audio file for transcription."
156
+ result = dispatcher.process(file_path, task="automatic-speech-recognition")
157
+ transcription = result.get('text', 'No transcription found.')
158
+ response_text = f"Audio Transcription:\n\n{transcription}"
159
+ return response_text
160
+
161
+ elif selected_task == "Video Analysis":
162
+ if not file_path: return "Please upload a video file for analysis."
163
+ result = dispatcher.process(file_path, task="video")
164
+ image_analysis = json.dumps(result.get('image_analysis'), indent=2)
165
+ audio_analysis = json.dumps(result.get('audio_analysis'), indent=2)
166
+ response_text = f"Video Analysis Result:\n\nImage Analysis:\n{image_analysis}\n\nAudio Analysis:\n{audio_analysis}"
167
+ return response_text
168
+
169
+ elif selected_task == "PDF Summarization (RAG)":
170
+ if not file_path: return "Please upload a PDF file for summarization."
171
+ result = dispatcher.process(file_path, task="pdf")
172
+ response_text = f"PDF Summary:\n\n{result}"
173
+ return response_text
174
+
175
+ elif selected_task == "Process Dataset":
176
+ # This task requires more specific parameters (dataset name, column, etc.)
177
+ # It's not directly compatible with a single chat message input.
178
+ # We'll guide the user to a separate interface for this, or simplify.
179
+ # For now, let's keep it simple: user provides dataset_name, subset, split, column in message.
180
+ # A more robust solution would involve a separate Gradio component for this.
181
+ return "For 'Process Dataset', please use the dedicated 'Dataset Analyzer' tab if it were available, or provide all parameters in your message like: 'dataset: glue, subset: sst2, split: train, column: sentence, task: sentiment-analysis, samples: 2'."
182
+ # Example of parsing:
183
+ # parts = message.split(',')
184
+ # params = {p.split(':')[0].strip(): p.split(':')[1].strip() for p in parts if ':' in p}
185
+ # dataset_name = params.get('dataset')
186
+ # subset_name = params.get('subset', '')
187
+ # split = params.get('split', 'train')
188
+ # column = params.get('column')
189
+ # task_for_dataset = params.get('task')
190
+ # num_samples = int(params.get('samples', 2))
191
+ # if not all([dataset_name, column, task_for_dataset]):
192
+ # return "Please provide dataset name, column, and task for dataset processing."
193
+ # result = dispatcher.process_dataset_from_hub(dataset_name, subset_name, split, column, task_for_dataset, num_samples)
194
+ # return f"Dataset Processing Results:\n\n{json.dumps(result, indent=2)}"
195
+
196
+ else:
197
+ return "Please select a valid task from the dropdown."
198
+
199
  except Exception as e:
200
+ logging.error(f"An error occurred in chat_with_ai: {e}")
201
+ return f"An unexpected error occurred during processing: {e}"
202
+ finally:
203
+ # Clean up temporary file if it was uploaded and processed
204
+ if file_path and os.path.exists(file_path):
205
+ # Gradio handles temp file cleanup for gr.File(type="filepath")
206
+ # However, if you manually copy/save, ensure cleanup.
207
+ # For this setup, Gradio should handle it.
208
+ pass
209
 
210
 
211
  # --- Gradio Interface Definition ---
212
 
213
+ # Define the choices for the task dropdown
214
+ task_choices = [
215
+ "General Chat",
216
+ "Summarize Text",
217
+ "Sentiment Analysis",
218
+ "Text Generation",
219
+ "Text-to-Speech (TTS)",
220
+ "Translation (EN to FR)",
221
+ "Image Classification",
222
+ "Object Detection",
223
+ "Automatic Speech Recognition",
224
+ "Video Analysis",
225
+ "PDF Summarization (RAG)",
226
+ # "Process Dataset" - Removed for now as it needs more complex input than a simple chat
 
 
 
227
  ]
 
 
 
 
 
 
 
228
 
229
+ # Create the ChatInterface
230
+ demo = gr.ChatInterface(
231
+ fn=chat_with_ai,
232
+ textbox=gr.Textbox(placeholder="Ask me anything or provide text/files for analysis...", container=False, scale=7),
233
+ chatbot=gr.Chatbot(height=500),
234
+ # Add a file upload component
235
+ additional_inputs=[
236
+ gr.Dropdown(task_choices, label="Select Task", value="General Chat", container=True),
237
+ gr.File(label="Upload File (Optional)", type="filepath", file_types=[
238
+ ".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4", ".avi", ".mkv"
239
+ ])
240
+ ],
241
+ title="💬 Multimodal AI Assistant (Chat Interface)",
242
+ description="Interact with various AI models. Select a task and provide your input (text or file)."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  )
244
 
245
  # --- Launch the Gradio App ---
 
247
  # For local testing, use demo.launch()
248
  # For Hugging Face Spaces, ensure all dependencies are in requirements.txt
249
  demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)
250
+