Arjon07CSE commited on
Commit
98f36fd
·
verified ·
1 Parent(s): db79a4d

developed the media analysis UI and also solved the url loading issue

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -146,25 +146,23 @@ def initialize_local_llm(hf_token=None):
146
 
147
  # --- DATA LOADING HELPER ---
148
  def load_data(file_obj, gsheet_url):
149
- """Loads a DataFrame from either an uploaded file or a Google Sheets URL."""
150
  if file_obj is not None:
151
  logging.info(f"Loading data from uploaded file: {file_obj.name}")
152
  return pd.read_csv(file_obj.name)
153
  elif gsheet_url and gsheet_url.strip():
154
- logging.info(f"Loading data from Google Sheets URL.")
155
  try:
156
- # Manipulate the URL for direct CSV export
157
- csv_url = gsheet_url.replace('/edit?usp=sharing', '/export?format=csv&gid=0')
158
- response = requests.get(csv_url)
159
  response.raise_for_status() # Raise an exception for bad status codes
160
  return pd.read_csv(StringIO(response.text))
161
  except Exception as e:
162
- raise ValueError(f"Failed to load from Google Sheets URL. Please ensure the link is correct and publicly accessible. Error: {e}")
163
  else:
164
  raise ValueError("Please upload a CSV file or provide a public Google Sheets URL.")
165
 
166
- print("✅ App branding, LLM initialization, and data loading functions appended to app.py")
167
-
168
  # --- MAIN ANALYSIS ENGINE ---
169
 
170
  # We will define the AI agent in the next cell. For now, this is a placeholder.
@@ -498,26 +496,28 @@ def generate_temporal_plot(date_column, progress=gr.Progress()):
498
  return None
499
 
500
  def generate_media_analysis(media_column):
501
- """Generates a bar chart for media source analysis."""
502
  if not media_column:
503
  gr.Warning("Please select a media column to analyze.")
504
  return None
505
  df = APP_STATE.get("df")
506
  if df is None or media_column not in df.columns:
507
  return None
508
-
509
- counts = df[media_column].value_counts().nlargest(20) # Get top 20 sources
510
-
511
- # Using Gradio's built-in plotting for simplicity
512
  plot_df = pd.DataFrame({'Media Source': counts.index, 'Article Count': counts.values})
 
 
513
  return gr.BarPlot(
514
  plot_df,
515
- x='Media Source',
516
- y='Article Count',
517
- title=f'Top 20 Media Sources by Article Count',
518
  tooltip=['Media Source', 'Article Count'],
519
  height=500,
520
- vertical_guides=[{'value': counts.mean(), 'label': 'Average'}]
 
521
  )
522
 
523
  def finalize_and_save():
@@ -563,7 +563,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title=APP_TITLE) as app:
563
  with gr.Column(scale=1):
564
  gr.Markdown("### 1. Data Input")
565
  file_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
566
- gsheet_url = gr.Textbox(label="Or Paste Google Sheets URL", placeholder="https://docs.google.com/spreadsheets/d/...")
 
 
567
 
568
  gr.Markdown("### 2. Select Columns")
569
  text_columns_checkboxgroup = gr.CheckboxGroup(label="Select Text Columns for Analysis", interactive=True)
 
146
 
147
  # --- DATA LOADING HELPER ---
148
  def load_data(file_obj, gsheet_url):
149
+ """Loads a DataFrame from an uploaded file or a direct Google Sheets CSV URL."""
150
  if file_obj is not None:
151
  logging.info(f"Loading data from uploaded file: {file_obj.name}")
152
  return pd.read_csv(file_obj.name)
153
  elif gsheet_url and gsheet_url.strip():
154
+ logging.info(f"Loading data directly from URL: {gsheet_url}")
155
  try:
156
+ # FIX: Removed the unreliable .replace() logic.
157
+ # We now expect a direct CSV link from the user.
158
+ response = requests.get(gsheet_url)
159
  response.raise_for_status() # Raise an exception for bad status codes
160
  return pd.read_csv(StringIO(response.text))
161
  except Exception as e:
162
+ raise ValueError(f"Failed to load from URL. Please ensure it is a direct CSV link. Error: {e}")
163
  else:
164
  raise ValueError("Please upload a CSV file or provide a public Google Sheets URL.")
165
 
 
 
166
  # --- MAIN ANALYSIS ENGINE ---
167
 
168
  # We will define the AI agent in the next cell. For now, this is a placeholder.
 
496
  return None
497
 
498
  def generate_media_analysis(media_column):
499
+ """Generates a horizontal bar chart for media source analysis to prevent label overlap."""
500
  if not media_column:
501
  gr.Warning("Please select a media column to analyze.")
502
  return None
503
  df = APP_STATE.get("df")
504
  if df is None or media_column not in df.columns:
505
  return None
506
+
507
+ counts = df[media_column].value_counts().nlargest(20).sort_values() # Get top 20 and sort for a nice plot
508
+
 
509
  plot_df = pd.DataFrame({'Media Source': counts.index, 'Article Count': counts.values})
510
+
511
+ # FIX: Swapped x and y to create a horizontal plot.
512
  return gr.BarPlot(
513
  plot_df,
514
+ x='Article Count', # The numeric value is now on the x-axis
515
+ y='Media Source', # The categorical labels are now on the y-axis
516
+ title='Top 20 Media Sources by Article Count',
517
  tooltip=['Media Source', 'Article Count'],
518
  height=500,
519
+ # FIX: Changed to horizontal_guides
520
+ horizontal_guides=[{'value': counts.mean(), 'label': 'Average'}]
521
  )
522
 
523
  def finalize_and_save():
 
563
  with gr.Column(scale=1):
564
  gr.Markdown("### 1. Data Input")
565
  file_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
566
+ gsheet_url = gr.Textbox(label="Or Paste Google Sheets URL",placeholder="https://docs.google.com/spreadsheets/d/e/2PACX-1vTn-mRrOCk6fww892XfziUk63pJu9g8uOdy4nHjygKXcN7oO3EAhXLMD7WZAatvoLubSPpMdQ5ymouz/pub?output=csv",
567
+ # FIX: Added clear instructions for the user.
568
+ info="How to get the link: In Google Sheets, go to File > Share > Publish to web. Select 'Comma-separated values (.csv)' and copy the generated link. for example: use this url "https://docs.google.com/spreadsheets/d/e/2PACX-1vTn-mRrOCk6fww892XfziUk63pJu9g8uOdy4nHjygKXcN7oO3EAhXLMD7WZAatvoLubSPpMdQ5ymouz/pub?output=csv")
569
 
570
  gr.Markdown("### 2. Select Columns")
571
  text_columns_checkboxgroup = gr.CheckboxGroup(label="Select Text Columns for Analysis", interactive=True)