Spaces:

nakas
/

TimberLine_Snow_History

Sleeping

App Files Files Community

nakas commited on Feb 10, 2025

Commit

c218a7a

verified ·

1 Parent(s): 770c9e3

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -72

app.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import gradio as gr
 from playwright.sync_api import sync_playwright
 import time
 import json
-from download_browsers import download_playwright_browsers
-# Download browsers on startup
-download_playwright_browsers()
-def scrape_website(url, wait_time=5):
     """
-    Scrape a website using Playwright headless browser
     Args:
-        url (str): The URL to scrape
-        wait_time (int): Time to wait for dynamic content to load
     Returns:
-        dict: Dictionary containing scraped data
     """
     try:
         with sync_playwright() as p:
             # Launch browser in headless mode
@@ -25,87 +25,128 @@ def scrape_website(url, wait_time=5):
             )
             page = context.new_page()
-            # Go to URL and wait for network to be idle
             page.goto(url, wait_until="networkidle")
-            time.sleep(wait_time)  # Additional wait for dynamic content
-            # Get basic page information
-            title = page.title()
-            # Extract all text content
-            text_content = page.text_content('body')
-            # Extract all links
-            links = page.eval_on_selector_all('a[href]', 'elements => elements.map(el => el.href)')
-            # Extract all images
-            images = page.eval_on_selector_all('img[src]', 'elements => elements.map(el => el.src)')
-            # Get meta description
-            meta_description = page.eval_on_selector('meta[name="description"]',
-                'element => element.content') if page.query_selector('meta[name="description"]') else ''
-            # Close browser
-            browser.close()
             return {
-                "title": title,
-                "meta_description": meta_description,
-                "text_content": text_content[:1000] + "...",  # Truncate for display
-                "links": links[:10],  # Show first 10 links
-                "images": images[:5],  # Show first 5 images
-                "status": "Success"
             }
     except Exception as e:
         return {
-            "status": "Error",
-            "error_message": str(e)
         }
 def format_output(result):
-    """Format the output for better display in Gradio"""
-    if result["status"] == "Error":
-        return f"Error: {result['error_message']}"
-    output = f"""
-### Page Title
-{result['title']}
-### Meta Description
-{result['meta_description']}
-### First 1000 characters of content
-{result['text_content']}
-### First 10 Links
-{json.dumps(result['links'], indent=2)}
-### First 5 Images
-{json.dumps(result['images'], indent=2)}
-    """
-    return output
 # Create Gradio interface
-iface = gr.Interface(
-    fn=lambda url, wait_time: format_output(scrape_website(url, wait_time)),
-    inputs=[
-        gr.Textbox(label="URL to scrape", placeholder="https://example.com"),
-        gr.Slider(minimum=1, maximum=15, value=5, step=1, label="Wait time (seconds)")
-    ],
-    outputs=gr.Markdown(),
-    title="Web Scraper with Headless Browser",
-    description="""
-    Enter a URL to scrape its content using a headless browser.
-    The tool will extract the title, meta description, text content, links, and images.
-    Please use responsibly and respect websites' terms of service and robots.txt files.
-    """,
-    examples=[
-        ["https://example.com", 5],
-        ["https://news.ycombinator.com", 8]
-    ]
-)
-# Launch the interface
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 from playwright.sync_api import sync_playwright
+import pandas as pd
 import time
 import json
+from datetime import datetime
+def scrape_weather_data(site_id="YCTIM", hours=720):
     """
+    Scrape weather data from weather.gov timeseries
     Args:
+        site_id (str): The weather station ID
+        hours (int): Number of hours of data to retrieve
     Returns:
+        dict: Dictionary containing parsed weather data and statistics
     """
+    url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
     try:
         with sync_playwright() as p:
             # Launch browser in headless mode
             )
             page = context.new_page()
+            # Go to URL and wait for data to load
             page.goto(url, wait_until="networkidle")
+            time.sleep(5)  # Additional wait for dynamic content
+            # Get the data table
+            table_selector = '#obsTable'
+            table_exists = page.wait_for_selector(table_selector, timeout=10000)
+            if not table_exists:
+                raise Exception("Weather data table not found")
+            # Extract table data
+            data = page.evaluate('''() => {
+                const table = document.querySelector('#obsTable');
+                const headers = Array.from(table.querySelectorAll('thead th')).map(th => th.textContent.trim());
+                const rows = Array.from(table.querySelectorAll('tbody tr')).map(row => {
+                    return Array.from(row.querySelectorAll('td')).map(td => td.textContent.trim());
+                });
+                return {headers, rows};
+            }''')
+            # Close browser
+            browser.close()
+            # Process the data
+            headers = [h.replace('\n', ' ').strip() for h in data['headers']]
+            df = pd.DataFrame(data['rows'], columns=headers)
+            # Convert numeric columns
+            numeric_columns = ['Temp. (°F)', 'Dew Point (°F)', 'Relative Humidity (%)', 'Wind Chill (°F)', 'Snow Depth (in)']
+            for col in numeric_columns:
+                if col in df.columns:
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+            # Parse wind speed and gusts
+            if 'Wind Speed (mph)' in df.columns:
+                df[['Wind Speed', 'Wind Gust']] = df['Wind Speed (mph)'].str.extract(r'(\d+)G(\d+)').astype(float)
+            # Calculate statistics
+            stats = {
+                'Temperature Range': f"{df['Temp. (°F)'].min():.1f}°F to {df['Temp. (°F)'].max():.1f}°F",
+                'Average Temperature': f"{df['Temp. (°F)'].mean():.1f}°F",
+                'Max Wind Speed': f"{df['Wind Speed'].max():.1f} mph",
+                'Max Wind Gust': f"{df['Wind Gust'].max():.1f} mph",
+                'Average Humidity': f"{df['Relative Humidity (%)'].mean():.1f}%",
+                'Max Snow Depth': f"{df['Snow Depth (in)'].max():.1f} inches"
+            }
             return {
+                'status': 'Success',
+                'statistics': stats,
+                'data': df.to_dict('records')
             }
     except Exception as e:
         return {
+            'status': 'Error',
+            'error_message': str(e)
         }
 def format_output(result):
+    """Format the output for display in Gradio"""
+    if result['status'] == 'Error':
+        return f"Error: {result['error_message']}", None, None
+    # Create statistics HTML
+    stats_html = "<div style='font-size: 16px; line-height: 1.5;'>"
+    for key, value in result['statistics'].items():
+        stats_html += f"<p><strong>{key}:</strong> {value}</p>"
+    stats_html += "</div>"
+    # Convert data back to DataFrame for plotting
+    df = pd.DataFrame(result['data'])
+    df['Date/Time'] = pd.to_datetime(df['Date/Time'])
+    # Create temperature plot
+    temp_fig = gr.Plot()
+    df.plot(x='Date/Time', y=['Temp. (°F)', 'Wind Chill (°F)'],
+            title='Temperature and Wind Chill Over Time',
+            figsize=(12, 6))
+    temp_fig.pyplot()
+    # Create wind plot
+    wind_fig = gr.Plot()
+    df.plot(x='Date/Time', y=['Wind Speed', 'Wind Gust'],
+            title='Wind Speed and Gusts Over Time',
+            figsize=(12, 6))
+    wind_fig.pyplot()
+    return stats_html, temp_fig, wind_fig
 # Create Gradio interface
+with gr.Blocks(title="Weather Station Data Analyzer") as demo:
+    gr.Markdown("# Weather Station Data Analyzer")
+    with gr.Row():
+        site_id = gr.Textbox(
+            label="Weather Station ID",
+            value="YCTIM",
+            placeholder="Enter station ID (e.g., YCTIM)"
+        )
+        hours = gr.Number(
+            label="Hours of Data",
+            value=720,
+            minimum=1,
+            maximum=1440
+        )
+    analyze_btn = gr.Button("Fetch and Analyze Weather Data")
+    with gr.Row():
+        stats_output = gr.HTML(label="Statistics")
+    with gr.Row():
+        temp_plot = gr.Plot(label="Temperature Plot")
+        wind_plot = gr.Plot(label="Wind Plot")
+    analyze_btn.click(
+        fn=lambda sid, hrs: format_output(scrape_weather_data(sid, hrs)),
+        inputs=[site_id, hours],
+        outputs=[stats_output, temp_plot, wind_plot]
+    )
 if __name__ == "__main__":
+    demo.launch()