CamiloVega commited on
Commit
a6f5353
Β·
verified Β·
1 Parent(s): 22888c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -157
app.py CHANGED
@@ -4,31 +4,27 @@ import whisper
4
  import tempfile
5
  import gradio as gr
6
  from pydub import AudioSegment
7
- import fitz # PyMuPDF para manejar PDFs
8
- import docx # Para manejar archivos .docx
9
- import pandas as pd # Para manejar archivos .xlsx y .csv
10
- #from google.colab import userdata # Importa userdata de google.colab
11
  import requests
12
  from bs4 import BeautifulSoup
13
  from moviepy.editor import VideoFileClip
14
  import yt_dlp
15
  import logging
16
 
17
- # Configurar logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
- # Configura tu clave API de OpenAI usando Google Colab userdata
22
- #openai.api_key = userdata.get('OPENAI_API_KEY')
23
-
24
- # Cargar las variables de entorno desde el entorno de Hugging Face
25
  openai.api_key = os.getenv("OPENAI_API_KEY")
26
 
27
- # Cargar el modelo Whisper de mayor calidad una vez
28
  model = whisper.load_model("large")
29
 
30
  def download_social_media_video(url):
31
- """Descarga un video de redes sociales."""
32
  ydl_opts = {
33
  'format': 'bestaudio/best',
34
  'postprocessors': [{
@@ -42,256 +38,272 @@ def download_social_media_video(url):
42
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
43
  info_dict = ydl.extract_info(url, download=True)
44
  audio_file = f"{info_dict['id']}.mp3"
45
- logger.info(f"Video descargado exitosamente: {audio_file}")
46
  return audio_file
47
  except Exception as e:
48
- logger.error(f"Error al descargar el video: {str(e)}")
49
  raise
50
 
51
  def convert_video_to_audio(video_file):
52
- """Convierte un archivo de video a audio."""
53
  try:
54
  video = VideoFileClip(video_file)
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
56
  video.audio.write_audiofile(temp_file.name)
57
- logger.info(f"Video convertido a audio: {temp_file.name}")
58
  return temp_file.name
59
  except Exception as e:
60
- logger.error(f"Error al convertir el video a audio: {str(e)}")
61
  raise
62
 
63
  def preprocess_audio(audio_file):
64
- """Preprocesa el archivo de audio para mejorar la calidad."""
65
  try:
66
  audio = AudioSegment.from_file(audio_file)
67
  audio = audio.apply_gain(-audio.dBFS + (-20))
68
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
69
  audio.export(temp_file.name, format="mp3")
70
- logger.info(f"Audio preprocesado: {temp_file.name}")
71
  return temp_file.name
72
  except Exception as e:
73
- logger.error(f"Error al preprocesar el archivo de audio: {str(e)}")
74
  raise
75
 
76
- def transcribir_audio(file):
77
- """Transcribe un archivo de audio o video."""
78
  try:
79
  if isinstance(file, str) and file.startswith('http'):
80
- logger.info(f"Descargando video de red social: {file}")
81
- archivo_path = download_social_media_video(file)
82
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
83
- logger.info(f"Convirtiendo video local a audio: {file}")
84
- archivo_path = convert_video_to_audio(file)
85
  else:
86
- logger.info(f"Preprocesando archivo de audio: {file}")
87
- archivo_path = preprocess_audio(file)
88
-
89
- logger.info(f"Transcribiendo audio: {archivo_path}")
90
- resultado = model.transcribe(archivo_path)
91
- transcripcion = resultado.get("text", "Error en la transcripciΓ³n")
92
- logger.info(f"TranscripciΓ³n completada: {transcripcion[:50]}...")
93
- return transcripcion
94
  except Exception as e:
95
- logger.error(f"Error al procesar el archivo: {str(e)}")
96
- return f"Error al procesar el archivo: {str(e)}"
97
 
98
- def leer_documento(documento_path):
99
- """Lee el contenido de un documento PDF, DOCX, XLSX o CSV."""
100
  try:
101
- if documento_path.endswith(".pdf"):
102
- doc = fitz.open(documento_path)
103
- return "\n".join([pagina.get_text() for pagina in doc])
104
- elif documento_path.endswith(".docx"):
105
- doc = docx.Document(documento_path)
106
- return "\n".join([parrafo.text for parrafo in doc.paragraphs])
107
- elif documento_path.endswith(".xlsx"):
108
- return pd.read_excel(documento_path).to_string()
109
- elif documento_path.endswith(".csv"):
110
- return pd.read_csv(documento_path).to_string()
111
  else:
112
- return "Tipo de archivo no soportado. Por favor suba un documento PDF, DOCX, XLSX o CSV."
113
  except Exception as e:
114
- return f"Error al leer el documento: {str(e)}"
115
 
116
- def leer_url(url):
117
- """Lee el contenido de una URL."""
118
  try:
119
  response = requests.get(url)
120
  response.raise_for_status()
121
  soup = BeautifulSoup(response.content, 'html.parser')
122
  return soup.get_text()
123
  except Exception as e:
124
- return f"Error al leer la URL: {str(e)}"
125
 
126
- def procesar_contenido_social(url):
127
- """Procesa el contenido de una URL de red social, manejando tanto texto como video."""
128
  try:
129
- # Primero, intentamos leer el contenido como texto
130
- contenido_texto = leer_url(url)
131
 
132
- # Luego, intentamos procesar como video
133
  try:
134
- contenido_video = transcribir_audio(url)
135
  except Exception:
136
- contenido_video = None
137
 
138
  return {
139
- "texto": contenido_texto,
140
- "video": contenido_video
141
  }
142
  except Exception as e:
143
- logger.error(f"Error al procesar contenido social: {str(e)}")
144
  return None
145
 
146
- def generar_noticia(instrucciones, hechos, tamaΓ±o, tono, *args):
147
- """Genera una noticia a partir de instrucciones, hechos, URLs, documentos, transcripciones y contenido de redes sociales."""
148
- base_de_conocimiento = {
149
- "instrucciones": instrucciones,
150
- "hechos": hechos,
151
- "contenido_documentos": [],
152
  "audio_data": [],
153
- "contenido_urls": [],
154
- "contenido_social": []
155
  }
156
- num_audios = 5 * 3 # 5 audios/videos * 3 campos (archivo, nombre, cargo)
157
- num_social_urls = 3 * 3 # 3 URLs de redes sociales * 3 campos (URL, nombre, contexto)
158
- num_urls = 5 # 5 URLs generales
159
  audios = args[:num_audios]
160
  social_urls = args[num_audios:num_audios+num_social_urls]
161
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
162
- documentos = args[num_audios+num_social_urls+num_urls:]
163
 
164
  for url in urls:
165
  if url:
166
- base_de_conocimiento["contenido_urls"].append(leer_url(url))
167
 
168
- for documento in documentos:
169
- if documento is not None:
170
- base_de_conocimiento["contenido_documentos"].append(leer_documento(documento.name))
171
 
172
  for i in range(0, len(audios), 3):
173
- audio_file, nombre, cargo = audios[i:i+3]
174
  if audio_file is not None:
175
- base_de_conocimiento["audio_data"].append({"audio": audio_file, "nombre": nombre, "cargo": cargo})
176
 
177
  for i in range(0, len(social_urls), 3):
178
- social_url, social_nombre, social_contexto = social_urls[i:i+3]
179
  if social_url:
180
- contenido_social = procesar_contenido_social(social_url)
181
- if contenido_social:
182
- base_de_conocimiento["contenido_social"].append({
183
  "url": social_url,
184
- "nombre": social_nombre,
185
- "contexto": social_contexto,
186
- "texto": contenido_social["texto"],
187
- "video": contenido_social["video"]
188
  })
189
- logger.info(f"Contenido de red social procesado: {social_url}")
190
 
191
- transcripciones_texto, transcripciones_brutas = "", ""
192
 
193
- for idx, data in enumerate(base_de_conocimiento["audio_data"]):
194
  if data["audio"] is not None:
195
- transcripcion = transcribir_audio(data["audio"])
196
- transcripcion_texto = f'"{transcripcion}" - {data["nombre"]}, {data["cargo"]}'
197
- transcripcion_bruta = f'[Audio/Video {idx + 1}]: "{transcripcion}" - {data["nombre"]}, {data["cargo"]}'
198
- transcripciones_texto += transcripcion_texto + "\n"
199
- transcripciones_brutas += transcripcion_bruta + "\n\n"
200
-
201
- for data in base_de_conocimiento["contenido_social"]:
202
- if data["texto"]:
203
- transcripcion_texto = f'[Texto de red social]: "{data["texto"][:200]}..." - {data["nombre"]}, {data["contexto"]}'
204
- transcripciones_texto += transcripcion_texto + "\n"
205
- transcripciones_brutas += transcripcion_texto + "\n\n"
206
  if data["video"]:
207
- transcripcion_video = f'[Video de red social]: "{data["video"]}" - {data["nombre"]}, {data["contexto"]}'
208
- transcripciones_texto += transcripcion_video + "\n"
209
- transcripciones_brutas += transcripcion_video + "\n\n"
210
-
211
- contenido_documentos = "\n\n".join(base_de_conocimiento["contenido_documentos"])
212
- contenido_urls = "\n\n".join(base_de_conocimiento["contenido_urls"])
213
-
214
- prompt_interno = """
215
- Instrucciones para el modelo:
216
- - Debes seguir los principios de una noticia: es decir, procura siempre responder las 5 W de una noticia en el primer pΓ‘rrafo (Who?, What?, When?, Where?, Why?).
217
- - AsegΓΊrate de que al menos el 80% de las citas sean directas y estΓ©n entrecomilladas.
218
- - El 20% restante puede ser citas indirectas.
219
- - No inventes informaciΓ³n nueva.
220
- - SΓ© riguroso con los hechos proporcionados.
221
- - Al procesar los documentos cargados, extrae y resalta citas importantes y testimonios textuales de las fuentes.
222
- - Al procesar los documentos cargados, extrae y resalta cifras clave.
223
- - Evita usar la fecha al comienzo del cuerpo de la noticia. Empieza directamente con las 5W.
224
- - Incluye el contenido de las redes sociales de manera relevante, citando la fuente y proporcionando el contexto adecuado.
225
- - AsegΓΊrate de relacionar el contexto proporcionado para el contenido de red social con su transcripciΓ³n o texto correspondiente.
226
  """
227
 
228
  prompt = f"""
229
- {prompt_interno}
230
- Escribe una noticia con la siguiente informaciΓ³n, incluyendo un tΓ­tulo, un gancho de 15 palabras (el gancho es lo que se conoce en inglΓ©s como hook, informaciΓ³n adicional que complementa el tΓ­tulo), y el cuerpo del contenido cuyo tamaΓ±o es {tamaΓ±o} palabras. El tono debe ser {tono}.
231
- Instrucciones: {base_de_conocimiento["instrucciones"]}
232
- Hechos: {base_de_conocimiento["hechos"]}
233
- Contenido adicional de los documentos: {contenido_documentos}
234
- Contenido adicional de las URLs: {contenido_urls}
235
- Utiliza las siguientes transcripciones como citas directas e indirectas (sin cambiar ni inventar contenido):
236
- {transcripciones_texto}
237
  """
238
 
239
  try:
240
- respuesta = openai.ChatCompletion.create(
241
  model="gpt-4o-mini",
242
  messages=[{"role": "user", "content": prompt}],
243
  temperature=0.1
244
  )
245
- noticia = respuesta['choices'][0]['message']['content']
246
- return noticia, transcripciones_brutas
247
  except Exception as e:
248
- logger.error(f"Error al generar la noticia: {str(e)}")
249
- return f"Error al generar la noticia: {str(e)}", ""
250
 
251
  with gr.Blocks() as demo:
252
- gr.Markdown("## Generador de noticias todo en uno")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  with gr.Row():
254
  with gr.Column(scale=2):
255
- instrucciones = gr.Textbox(label="Instrucciones para la noticia", lines=2)
256
- hechos = gr.Textbox(label="Describe los hechos de la noticia", lines=4)
257
- tamaΓ±o = gr.Number(label="TamaΓ±o del cuerpo de la noticia (en palabras)", value=100)
258
- tono = gr.Dropdown(label="Tono de la noticia", choices=["serio", "neutral", "divertido"], value="neutral")
259
  with gr.Column(scale=3):
260
- inputs_list = [instrucciones, hechos, tamaΓ±o, tono]
261
  with gr.Tabs():
262
  for i in range(1, 6):
263
  with gr.TabItem(f"Audio/Video {i}"):
264
  file = gr.File(label=f"Audio/Video {i}", type="filepath", file_types=["audio", "video"])
265
- nombre = gr.Textbox(label="Nombre", scale=1)
266
- cargo = gr.Textbox(label="Cargo", scale=1)
267
- inputs_list.extend([file, nombre, cargo])
268
  for i in range(1, 4):
269
- with gr.TabItem(f"Red Social {i}"):
270
- social_url = gr.Textbox(label=f"URL de red social {i}", lines=1)
271
- social_nombre = gr.Textbox(label=f"Nombre de persona/cuenta {i}", scale=1)
272
- social_contexto = gr.Textbox(label=f"Contexto del contenido {i}", lines=2)
273
- inputs_list.extend([social_url, social_nombre, social_contexto])
274
  for i in range(1, 6):
275
  with gr.TabItem(f"URL {i}"):
276
  url = gr.Textbox(label=f"URL {i}", lines=1)
277
  inputs_list.append(url)
278
  for i in range(1, 6):
279
- with gr.TabItem(f"Documento {i}"):
280
- documento = gr.File(label=f"Documento {i}", type="filepath", file_count="single")
281
- inputs_list.append(documento)
282
 
283
- gr.Markdown("---") # Separador visual
284
 
285
  with gr.Row():
286
- transcripciones_output = gr.Textbox(label="Transcripciones", lines=10)
287
 
288
- gr.Markdown("---") # Separador visual
289
 
290
  with gr.Row():
291
- generar = gr.Button("Generar borrador")
292
  with gr.Row():
293
- noticia_output = gr.Textbox(label="Borrador generado", lines=20)
294
 
295
- generar.click(fn=generar_noticia, inputs=inputs_list, outputs=[noticia_output, transcripciones_output])
296
 
297
  demo.launch(share=True)
 
4
  import tempfile
5
  import gradio as gr
6
  from pydub import AudioSegment
7
+ import fitz # PyMuPDF for handling PDFs
8
+ import docx # For handling .docx files
9
+ import pandas as pd # For handling .xlsx and .csv files
 
10
  import requests
11
  from bs4 import BeautifulSoup
12
  from moviepy.editor import VideoFileClip
13
  import yt_dlp
14
  import logging
15
 
16
+ # Configure logging
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+ # Configure your OpenAI API key
 
 
 
21
  openai.api_key = os.getenv("OPENAI_API_KEY")
22
 
23
+ # Load the highest quality Whisper model once
24
  model = whisper.load_model("large")
25
 
26
  def download_social_media_video(url):
27
+ """Downloads a video from social media."""
28
  ydl_opts = {
29
  'format': 'bestaudio/best',
30
  'postprocessors': [{
 
38
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
  info_dict = ydl.extract_info(url, download=True)
40
  audio_file = f"{info_dict['id']}.mp3"
41
+ logger.info(f"Video successfully downloaded: {audio_file}")
42
  return audio_file
43
  except Exception as e:
44
+ logger.error(f"Error downloading video: {str(e)}")
45
  raise
46
 
47
  def convert_video_to_audio(video_file):
48
+ """Converts a video file to audio."""
49
  try:
50
  video = VideoFileClip(video_file)
51
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
52
  video.audio.write_audiofile(temp_file.name)
53
+ logger.info(f"Video converted to audio: {temp_file.name}")
54
  return temp_file.name
55
  except Exception as e:
56
+ logger.error(f"Error converting video to audio: {str(e)}")
57
  raise
58
 
59
  def preprocess_audio(audio_file):
60
+ """Preprocesses the audio file to improve quality."""
61
  try:
62
  audio = AudioSegment.from_file(audio_file)
63
  audio = audio.apply_gain(-audio.dBFS + (-20))
64
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
65
  audio.export(temp_file.name, format="mp3")
66
+ logger.info(f"Audio preprocessed: {temp_file.name}")
67
  return temp_file.name
68
  except Exception as e:
69
+ logger.error(f"Error preprocessing audio file: {str(e)}")
70
  raise
71
 
72
+ def transcribe_audio(file):
73
+ """Transcribes an audio or video file."""
74
  try:
75
  if isinstance(file, str) and file.startswith('http'):
76
+ logger.info(f"Downloading social media video: {file}")
77
+ file_path = download_social_media_video(file)
78
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
79
+ logger.info(f"Converting local video to audio: {file}")
80
+ file_path = convert_video_to_audio(file)
81
  else:
82
+ logger.info(f"Preprocessing audio file: {file}")
83
+ file_path = preprocess_audio(file)
84
+
85
+ logger.info(f"Transcribing audio: {file_path}")
86
+ result = model.transcribe(file_path)
87
+ transcription = result.get("text", "Error in transcription")
88
+ logger.info(f"Transcription completed: {transcription[:50]}...")
89
+ return transcription
90
  except Exception as e:
91
+ logger.error(f"Error processing file: {str(e)}")
92
+ return f"Error processing file: {str(e)}"
93
 
94
+ def read_document(document_path):
95
+ """Reads content from PDF, DOCX, XLSX or CSV documents."""
96
  try:
97
+ if document_path.endswith(".pdf"):
98
+ doc = fitz.open(document_path)
99
+ return "\n".join([page.get_text() for page in doc])
100
+ elif document_path.endswith(".docx"):
101
+ doc = docx.Document(document_path)
102
+ return "\n".join([paragraph.text for paragraph in doc.paragraphs])
103
+ elif document_path.endswith(".xlsx"):
104
+ return pd.read_excel(document_path).to_string()
105
+ elif document_path.endswith(".csv"):
106
+ return pd.read_csv(document_path).to_string()
107
  else:
108
+ return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
109
  except Exception as e:
110
+ return f"Error reading document: {str(e)}"
111
 
112
+ def read_url(url):
113
+ """Reads content from a URL."""
114
  try:
115
  response = requests.get(url)
116
  response.raise_for_status()
117
  soup = BeautifulSoup(response.content, 'html.parser')
118
  return soup.get_text()
119
  except Exception as e:
120
+ return f"Error reading URL: {str(e)}"
121
 
122
+ def process_social_content(url):
123
+ """Processes content from a social media URL, handling both text and video."""
124
  try:
125
+ # First, try to read content as text
126
+ text_content = read_url(url)
127
 
128
+ # Then, try to process as video
129
  try:
130
+ video_content = transcribe_audio(url)
131
  except Exception:
132
+ video_content = None
133
 
134
  return {
135
+ "text": text_content,
136
+ "video": video_content
137
  }
138
  except Exception as e:
139
+ logger.error(f"Error processing social content: {str(e)}")
140
  return None
141
 
142
+ def generate_news(instructions, facts, size, tone, *args):
143
+ """Generates a news article from instructions, facts, URLs, documents, transcriptions, and social media content."""
144
+ knowledge_base = {
145
+ "instructions": instructions,
146
+ "facts": facts,
147
+ "document_content": [],
148
  "audio_data": [],
149
+ "url_content": [],
150
+ "social_content": []
151
  }
152
+ num_audios = 5 * 3 # 5 audios/videos * 3 fields (file, name, position)
153
+ num_social_urls = 3 * 3 # 3 social media URLs * 3 fields (URL, name, context)
154
+ num_urls = 5 # 5 general URLs
155
  audios = args[:num_audios]
156
  social_urls = args[num_audios:num_audios+num_social_urls]
157
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
158
+ documents = args[num_audios+num_social_urls+num_urls:]
159
 
160
  for url in urls:
161
  if url:
162
+ knowledge_base["url_content"].append(read_url(url))
163
 
164
+ for document in documents:
165
+ if document is not None:
166
+ knowledge_base["document_content"].append(read_document(document.name))
167
 
168
  for i in range(0, len(audios), 3):
169
+ audio_file, name, position = audios[i:i+3]
170
  if audio_file is not None:
171
+ knowledge_base["audio_data"].append({"audio": audio_file, "name": name, "position": position})
172
 
173
  for i in range(0, len(social_urls), 3):
174
+ social_url, social_name, social_context = social_urls[i:i+3]
175
  if social_url:
176
+ social_content = process_social_content(social_url)
177
+ if social_content:
178
+ knowledge_base["social_content"].append({
179
  "url": social_url,
180
+ "name": social_name,
181
+ "context": social_context,
182
+ "text": social_content["text"],
183
+ "video": social_content["video"]
184
  })
185
+ logger.info(f"Social media content processed: {social_url}")
186
 
187
+ transcriptions_text, raw_transcriptions = "", ""
188
 
189
+ for idx, data in enumerate(knowledge_base["audio_data"]):
190
  if data["audio"] is not None:
191
+ transcription = transcribe_audio(data["audio"])
192
+ transcription_text = f'"{transcription}" - {data["name"]}, {data["position"]}'
193
+ raw_transcription = f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}'
194
+ transcriptions_text += transcription_text + "\n"
195
+ raw_transcriptions += raw_transcription + "\n\n"
196
+
197
+ for data in knowledge_base["social_content"]:
198
+ if data["text"]:
199
+ transcription_text = f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}'
200
+ transcriptions_text += transcription_text + "\n"
201
+ raw_transcriptions += transcription_text + "\n\n"
202
  if data["video"]:
203
+ transcription_video = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}'
204
+ transcriptions_text += transcription_video + "\n"
205
+ raw_transcriptions += transcription_video + "\n\n"
206
+
207
+ document_content = "\n\n".join(knowledge_base["document_content"])
208
+ url_content = "\n\n".join(knowledge_base["url_content"])
209
+
210
+ internal_prompt = """
211
+ Instructions for the model:
212
+ - Follow news article principles: answer the 5 Ws in the first paragraph (Who?, What?, When?, Where?, Why?).
213
+ - Ensure at least 80% of quotes are direct and in quotation marks.
214
+ - The remaining 20% can be indirect quotes.
215
+ - Don't invent new information.
216
+ - Be rigorous with provided facts.
217
+ - When processing uploaded documents, extract and highlight important quotes and testimonials from sources.
218
+ - When processing uploaded documents, extract and highlight key figures.
219
+ - Avoid using the date at the beginning of the news body. Start directly with the 5Ws.
220
+ - Include social media content relevantly, citing the source and providing proper context.
221
+ - Make sure to relate the provided context for social media content with its corresponding transcription or text.
222
  """
223
 
224
  prompt = f"""
225
+ {internal_prompt}
226
+ Write a news article with the following information, including a title, a 15-word hook (additional information that complements the title), and the content body with {size} words. The tone should be {tone}.
227
+ Instructions: {knowledge_base["instructions"]}
228
+ Facts: {knowledge_base["facts"]}
229
+ Additional content from documents: {document_content}
230
+ Additional content from URLs: {url_content}
231
+ Use the following transcriptions as direct and indirect quotes (without changing or inventing content):
232
+ {transcriptions_text}
233
  """
234
 
235
  try:
236
+ response = openai.ChatCompletion.create(
237
  model="gpt-4o-mini",
238
  messages=[{"role": "user", "content": prompt}],
239
  temperature=0.1
240
  )
241
+ news = response['choices'][0]['message']['content']
242
+ return news, raw_transcriptions
243
  except Exception as e:
244
+ logger.error(f"Error generating news article: {str(e)}")
245
+ return f"Error generating news article: {str(e)}", ""
246
 
247
  with gr.Blocks() as demo:
248
+ gr.Markdown("## All-in-One News Generator")
249
+
250
+ # Add tool description and attribution
251
+ gr.Markdown("""
252
+ ### About this tool
253
+
254
+ This AI-powered news generator helps journalists and content creators produce news articles by processing multiple types of input:
255
+ - Audio and video files with automatic transcription
256
+ - Social media content
257
+ - Documents (PDF, DOCX, XLSX, CSV)
258
+ - Web URLs
259
+
260
+ The tool uses advanced AI to generate well-structured news articles following journalistic principles and maintaining the integrity of source quotes.
261
+
262
+ Created by [Camilo Vega](https://www.linkedin.com/in/camilo-vega-169084b1/), AI Consultant
263
+ """)
264
+
265
  with gr.Row():
266
  with gr.Column(scale=2):
267
+ instructions = gr.Textbox(label="News article instructions", lines=2)
268
+ facts = gr.Textbox(label="Describe the news facts", lines=4)
269
+ size = gr.Number(label="Content body size (in words)", value=100)
270
+ tone = gr.Dropdown(label="News tone", choices=["serious", "neutral", "lighthearted"], value="neutral")
271
  with gr.Column(scale=3):
272
+ inputs_list = [instructions, facts, size, tone]
273
  with gr.Tabs():
274
  for i in range(1, 6):
275
  with gr.TabItem(f"Audio/Video {i}"):
276
  file = gr.File(label=f"Audio/Video {i}", type="filepath", file_types=["audio", "video"])
277
+ name = gr.Textbox(label="Name", scale=1)
278
+ position = gr.Textbox(label="Position", scale=1)
279
+ inputs_list.extend([file, name, position])
280
  for i in range(1, 4):
281
+ with gr.TabItem(f"Social Media {i}"):
282
+ social_url = gr.Textbox(label=f"Social media URL {i}", lines=1)
283
+ social_name = gr.Textbox(label=f"Person/account name {i}", scale=1)
284
+ social_context = gr.Textbox(label=f"Content context {i}", lines=2)
285
+ inputs_list.extend([social_url, social_name, social_context])
286
  for i in range(1, 6):
287
  with gr.TabItem(f"URL {i}"):
288
  url = gr.Textbox(label=f"URL {i}", lines=1)
289
  inputs_list.append(url)
290
  for i in range(1, 6):
291
+ with gr.TabItem(f"Document {i}"):
292
+ document = gr.File(label=f"Document {i}", type="filepath", file_count="single")
293
+ inputs_list.append(document)
294
 
295
+ gr.Markdown("---") # Visual separator
296
 
297
  with gr.Row():
298
+ transcriptions_output = gr.Textbox(label="Transcriptions", lines=10)
299
 
300
+ gr.Markdown("---") # Visual separator
301
 
302
  with gr.Row():
303
+ generate = gr.Button("Generate Draft")
304
  with gr.Row():
305
+ news_output = gr.Textbox(label="Generated Draft", lines=20)
306
 
307
+ generate.click(fn=generate_news, inputs=inputs_list, outputs=[news_output, transcriptions_output])
308
 
309
  demo.launch(share=True)