#import gradio as gr #with gr.Blocks(theme=gr.themes.Glass()) as demo: # open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll") # open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll") #demo.launch() import gradio as gr import pandas as pd import requests import math import json def scrape_websites(urls_input): """Scrapes multiple URLs and returns results in a paginated DataFrame. Args: urls_input (str): A string containing URLs, one per line. Returns: tuple: A tuple containing: - pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"]. - str: Status message (e.g., "Scraping completed", "No URLs provided"). - pd.DataFrame: Full DataFrame with all results. - int: Current page number (initially 1). - str: Page information (e.g., "Page 1"). """ if not urls_input: return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1" urls = [url.strip() for url in urls_input.split("\n") if url.strip()] if not urls: return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1" results = [] for url in urls: try: response = requests.post("http://localhost:8000/scrape", json={"url": url}) result = response.json() if "error" in result: results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"}) else: results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"}) except Exception as e: results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"}) df = pd.DataFrame(results, columns=["URL", "Content", "Status"]) paginated_df = df.head(5) status = "Scraping completed" if results else "No results to display" page = 1 page_info = f"Page {page}" return paginated_df, status, df, page, page_info def change_page(full_df, current_page, direction): """Changes the displayed page of scraped results. Args: full_df (pd.DataFrame): Full DataFrame containing all scraped results. current_page (int): Current page number. direction (str): Navigation direction ("next" or "prev"). Returns: tuple: A tuple containing: - pd.DataFrame: Paginated DataFrame with up to 5 rows. - str: Status message (e.g., "Page updated"). - pd.DataFrame: Unchanged full DataFrame. - int: Updated current page number. - str: Page information (e.g., "Page 2 of 3"). """ if full_df.empty: return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}" rows_per_page = 5 total_rows = len(full_df) total_pages = math.ceil(total_rows / rows_per_page) if direction == "next" and current_page < total_pages: current_page += 1 elif direction == "prev" and current_page > 1: current_page -= 1 start_idx = (current_page - 1) * rows_per_page end_idx = start_idx + rows_per_page paginated_df = full_df.iloc[start_idx:end_idx] page_info = f"Page {current_page} of {total_pages}" status = "Page updated" if not paginated_df.empty else "No results on this page" return paginated_df, status, full_df, current_page, page_info def retrieve_notes(): """Fetches notes from Server 2 and returns them as a DataFrame. Args: None Returns: tuple: A tuple containing: - pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"]. - str: Status message (e.g., "Notes loaded successfully", "No notes found"). """ try: response = requests.get("http://localhost:8001/notes") result = response.json() if "error" in result or not result: return pd.DataFrame(), "No notes found" processed_results = [ { "id": item.get("id", "N/A"), "topic": item.get("topic", "Summary"), "notes": item.get("notes", ""), "url": item.get("url", ""), "tag": item.get("tag", "General") } for item in result ] df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"]) return df, "Notes loaded successfully" except Exception as e: return pd.DataFrame(), f"Error: {str(e)}" def filter_notes(notes_df, max_rows, search_query, search_field): """Filters and searches notes based on user input. Args: notes_df (pd.DataFrame): DataFrame containing notes. max_rows (str): Maximum rows to display ("5", "10", "25", or "All"). search_query (str): Search term to filter notes. search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all"). Returns: tuple: A tuple containing: - pd.DataFrame: Filtered DataFrame. - str: Status message (e.g., "Filtered notes loaded", "No matching notes found"). """ if notes_df.empty: return pd.DataFrame(), "No notes available" try: filtered_df = notes_df.copy() if search_query and search_field: search_query = search_query.lower() if search_field == "all": filtered_df = filtered_df[ filtered_df.apply( lambda row: any(search_query in str(val).lower() for val in row), axis=1 ) ] else: filtered_df = filtered_df[ filtered_df[search_field].str.lower().str.contains(search_query, na=False) ] if max_rows != "All": max_rows = int(max_rows) filtered_df = filtered_df.head(max_rows) status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found" return filtered_df, status except Exception as e: return pd.DataFrame(), f"Error: {str(e)}" def view_note_content(selected_row: int, notes_df): """Displays the content of a selected note. Args: selected_row (int): Index of the selected row in the DataFrame. notes_df (pd.DataFrame): DataFrame containing notes. Returns: str: The content of the selected note or an error/status message. """ if selected_row is None or notes_df.empty: return "No note selected or no data available" try: return notes_df.iloc[selected_row]["notes"] except Exception as e: return f"Error: {str(e)}" def download_notes(notes_df, format_choice): """Downloads notes in CSV or JSON format. Args: notes_df (pd.DataFrame): DataFrame containing notes. format_choice (str): Download format ("CSV" or "JSON"). Returns: tuple: A tuple containing: - gr.File or None: File object with the downloaded content or None if no data. - str: Status message (e.g., "Download ready", "Data not available to download"). """ if notes_df.empty: return None, "Data not available to download" try: if format_choice == "CSV": content = notes_df.to_csv(index=False) filename = "notes.csv" mime_type = "text/csv" elif format_choice == "JSON": content = notes_df.to_json(orient="records", lines=True) filename = "notes.json" mime_type = "application/json" else: return None, "Invalid format selected" return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready" except Exception as e: return None, f"Error: {str(e)}" # Gradio Tabbed Interface with gr.Blocks() as app: gr.Markdown("# Knowledge Store App") with gr.Tabs(): # Tab 1: Input Client with Multi-URL Support and Pagination with gr.Tab(label="URL Scraper"): url_input = gr.Textbox( label="Enter Webpage URLs (one per line)", placeholder="https://example.com\nhttps://wikipedia.org", lines=5 ) scrape_button = gr.Button("Scrape URLs") scrape_output = gr.Dataframe( headers=["URL", "Content", "Status"], label="Scraped Results", wrap=False ) scrape_status = gr.Textbox(label="Status") with gr.Row(): prev_button = gr.Button("Previous Page") next_button = gr.Button("Next Page") page_info = gr.Textbox(label="Page", value="Page 1", interactive=False) full_results = gr.State(pd.DataFrame()) current_page = gr.State(1) scrape_button.click( fn=scrape_websites, inputs=url_input, outputs=[scrape_output, scrape_status, full_results, current_page, page_info] ) prev_button.click( fn=change_page, inputs=[full_results, current_page, gr.State("prev")], outputs=[scrape_output, scrape_status, full_results, current_page, page_info] ) next_button.click( fn=change_page, inputs=[full_results, current_page, gr.State("next")], outputs=[scrape_output, scrape_status, full_results, current_page, page_info] ) # Tab 2: Retrieval Client with Enhanced Columns with gr.Tab(label="View Notes"): with gr.Row(): max_rows = gr.Dropdown( choices=["5", "10", "25", "All"], value="All", label="Max Rows to Display" ) search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term") search_field = gr.Dropdown( choices=["id", "topic", "notes", "url", "tag", "all"], value="all", label="Search Field" ) retrieve_button = gr.Button("Fetch Notes") notes_table = gr.Dataframe( headers=["id", "topic", "notes", "url", "tag"], label="Stored Notes", interactive=True, wrap=False ) notes_status = gr.Textbox(label="Status") content_view = gr.Textbox(label="Selected Note Content", lines=5) with gr.Row(): format_choice = gr.Dropdown( choices=["CSV", "JSON"], value="CSV", label="Download Format" ) download_button = gr.Button("Download Notes") download_file = gr.File(label="Download File", visible=False) retrieve_button.click( fn=retrieve_notes, outputs=[notes_table, notes_status] ) max_rows.change( fn=filter_notes, inputs=[notes_table, max_rows, search_query, search_field], outputs=[notes_table, notes_status] ) search_query.change( fn=filter_notes, inputs=[notes_table, max_rows, search_query, search_field], outputs=[notes_table, notes_status] ) search_field.change( fn=filter_notes, inputs=[notes_table, max_rows, search_query, search_field], outputs=[notes_table, notes_status] ) notes_table.select( fn=view_note_content, inputs=[notes_table], outputs=content_view ) download_button.click( fn=download_notes, inputs=[notes_table, format_choice], outputs=[download_file, notes_status] ) app.launch()