developed the media analysis UI and also solved the url loading issue
Browse files
app.py
CHANGED
|
@@ -146,25 +146,23 @@ def initialize_local_llm(hf_token=None):
|
|
| 146 |
|
| 147 |
# --- DATA LOADING HELPER ---
|
| 148 |
def load_data(file_obj, gsheet_url):
|
| 149 |
-
"""Loads a DataFrame from
|
| 150 |
if file_obj is not None:
|
| 151 |
logging.info(f"Loading data from uploaded file: {file_obj.name}")
|
| 152 |
return pd.read_csv(file_obj.name)
|
| 153 |
elif gsheet_url and gsheet_url.strip():
|
| 154 |
-
logging.info(f"Loading data from
|
| 155 |
try:
|
| 156 |
-
#
|
| 157 |
-
|
| 158 |
-
response = requests.get(
|
| 159 |
response.raise_for_status() # Raise an exception for bad status codes
|
| 160 |
return pd.read_csv(StringIO(response.text))
|
| 161 |
except Exception as e:
|
| 162 |
-
raise ValueError(f"Failed to load from
|
| 163 |
else:
|
| 164 |
raise ValueError("Please upload a CSV file or provide a public Google Sheets URL.")
|
| 165 |
|
| 166 |
-
print("✅ App branding, LLM initialization, and data loading functions appended to app.py")
|
| 167 |
-
|
| 168 |
# --- MAIN ANALYSIS ENGINE ---
|
| 169 |
|
| 170 |
# We will define the AI agent in the next cell. For now, this is a placeholder.
|
|
@@ -498,26 +496,28 @@ def generate_temporal_plot(date_column, progress=gr.Progress()):
|
|
| 498 |
return None
|
| 499 |
|
| 500 |
def generate_media_analysis(media_column):
|
| 501 |
-
"""Generates a bar chart for media source analysis."""
|
| 502 |
if not media_column:
|
| 503 |
gr.Warning("Please select a media column to analyze.")
|
| 504 |
return None
|
| 505 |
df = APP_STATE.get("df")
|
| 506 |
if df is None or media_column not in df.columns:
|
| 507 |
return None
|
| 508 |
-
|
| 509 |
-
counts = df[media_column].value_counts().nlargest(20) # Get top 20
|
| 510 |
-
|
| 511 |
-
# Using Gradio's built-in plotting for simplicity
|
| 512 |
plot_df = pd.DataFrame({'Media Source': counts.index, 'Article Count': counts.values})
|
|
|
|
|
|
|
| 513 |
return gr.BarPlot(
|
| 514 |
plot_df,
|
| 515 |
-
x='
|
| 516 |
-
y='
|
| 517 |
-
title=
|
| 518 |
tooltip=['Media Source', 'Article Count'],
|
| 519 |
height=500,
|
| 520 |
-
|
|
|
|
| 521 |
)
|
| 522 |
|
| 523 |
def finalize_and_save():
|
|
@@ -563,7 +563,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title=APP_TITLE) as app:
|
|
| 563 |
with gr.Column(scale=1):
|
| 564 |
gr.Markdown("### 1. Data Input")
|
| 565 |
file_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
|
| 566 |
-
gsheet_url = gr.Textbox(label="Or Paste Google Sheets URL",
|
|
|
|
|
|
|
| 567 |
|
| 568 |
gr.Markdown("### 2. Select Columns")
|
| 569 |
text_columns_checkboxgroup = gr.CheckboxGroup(label="Select Text Columns for Analysis", interactive=True)
|
|
|
|
| 146 |
|
| 147 |
# --- DATA LOADING HELPER ---
|
| 148 |
def load_data(file_obj, gsheet_url):
|
| 149 |
+
"""Loads a DataFrame from an uploaded file or a direct Google Sheets CSV URL."""
|
| 150 |
if file_obj is not None:
|
| 151 |
logging.info(f"Loading data from uploaded file: {file_obj.name}")
|
| 152 |
return pd.read_csv(file_obj.name)
|
| 153 |
elif gsheet_url and gsheet_url.strip():
|
| 154 |
+
logging.info(f"Loading data directly from URL: {gsheet_url}")
|
| 155 |
try:
|
| 156 |
+
# FIX: Removed the unreliable .replace() logic.
|
| 157 |
+
# We now expect a direct CSV link from the user.
|
| 158 |
+
response = requests.get(gsheet_url)
|
| 159 |
response.raise_for_status() # Raise an exception for bad status codes
|
| 160 |
return pd.read_csv(StringIO(response.text))
|
| 161 |
except Exception as e:
|
| 162 |
+
raise ValueError(f"Failed to load from URL. Please ensure it is a direct CSV link. Error: {e}")
|
| 163 |
else:
|
| 164 |
raise ValueError("Please upload a CSV file or provide a public Google Sheets URL.")
|
| 165 |
|
|
|
|
|
|
|
| 166 |
# --- MAIN ANALYSIS ENGINE ---
|
| 167 |
|
| 168 |
# We will define the AI agent in the next cell. For now, this is a placeholder.
|
|
|
|
| 496 |
return None
|
| 497 |
|
| 498 |
def generate_media_analysis(media_column):
|
| 499 |
+
"""Generates a horizontal bar chart for media source analysis to prevent label overlap."""
|
| 500 |
if not media_column:
|
| 501 |
gr.Warning("Please select a media column to analyze.")
|
| 502 |
return None
|
| 503 |
df = APP_STATE.get("df")
|
| 504 |
if df is None or media_column not in df.columns:
|
| 505 |
return None
|
| 506 |
+
|
| 507 |
+
counts = df[media_column].value_counts().nlargest(20).sort_values() # Get top 20 and sort for a nice plot
|
| 508 |
+
|
|
|
|
| 509 |
plot_df = pd.DataFrame({'Media Source': counts.index, 'Article Count': counts.values})
|
| 510 |
+
|
| 511 |
+
# FIX: Swapped x and y to create a horizontal plot.
|
| 512 |
return gr.BarPlot(
|
| 513 |
plot_df,
|
| 514 |
+
x='Article Count', # The numeric value is now on the x-axis
|
| 515 |
+
y='Media Source', # The categorical labels are now on the y-axis
|
| 516 |
+
title='Top 20 Media Sources by Article Count',
|
| 517 |
tooltip=['Media Source', 'Article Count'],
|
| 518 |
height=500,
|
| 519 |
+
# FIX: Changed to horizontal_guides
|
| 520 |
+
horizontal_guides=[{'value': counts.mean(), 'label': 'Average'}]
|
| 521 |
)
|
| 522 |
|
| 523 |
def finalize_and_save():
|
|
|
|
| 563 |
with gr.Column(scale=1):
|
| 564 |
gr.Markdown("### 1. Data Input")
|
| 565 |
file_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
|
| 566 |
+
gsheet_url = gr.Textbox(label="Or Paste Google Sheets URL",placeholder="https://docs.google.com/spreadsheets/d/e/2PACX-1vTn-mRrOCk6fww892XfziUk63pJu9g8uOdy4nHjygKXcN7oO3EAhXLMD7WZAatvoLubSPpMdQ5ymouz/pub?output=csv",
|
| 567 |
+
# FIX: Added clear instructions for the user.
|
| 568 |
+
info="How to get the link: In Google Sheets, go to File > Share > Publish to web. Select 'Comma-separated values (.csv)' and copy the generated link. for example: use this url "https://docs.google.com/spreadsheets/d/e/2PACX-1vTn-mRrOCk6fww892XfziUk63pJu9g8uOdy4nHjygKXcN7oO3EAhXLMD7WZAatvoLubSPpMdQ5ymouz/pub?output=csv")
|
| 569 |
|
| 570 |
gr.Markdown("### 2. Select Columns")
|
| 571 |
text_columns_checkboxgroup = gr.CheckboxGroup(label="Select Text Columns for Analysis", interactive=True)
|