Spaces:
Sleeping
Sleeping
| #import gradio as gr | |
| #with gr.Blocks(theme=gr.themes.Glass()) as demo: | |
| # open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll") | |
| # open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll") | |
| #demo.launch() | |
| import gradio as gr | |
| import pandas as pd | |
| import requests | |
| import math | |
| import json | |
| def scrape_websites(urls_input): | |
| """Scrapes multiple URLs and returns results in a paginated DataFrame. | |
| Args: | |
| urls_input (str): A string containing URLs, one per line. | |
| Returns: | |
| tuple: A tuple containing: | |
| - pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"]. | |
| - str: Status message (e.g., "Scraping completed", "No URLs provided"). | |
| - pd.DataFrame: Full DataFrame with all results. | |
| - int: Current page number (initially 1). | |
| - str: Page information (e.g., "Page 1"). | |
| """ | |
| if not urls_input: | |
| return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1" | |
| urls = [url.strip() for url in urls_input.split("\n") if url.strip()] | |
| if not urls: | |
| return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1" | |
| results = [] | |
| for url in urls: | |
| try: | |
| response = requests.post("http://localhost:8000/scrape", json={"url": url}) | |
| result = response.json() | |
| if "error" in result: | |
| results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"}) | |
| else: | |
| results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"}) | |
| except Exception as e: | |
| results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"}) | |
| df = pd.DataFrame(results, columns=["URL", "Content", "Status"]) | |
| paginated_df = df.head(5) | |
| status = "Scraping completed" if results else "No results to display" | |
| page = 1 | |
| page_info = f"Page {page}" | |
| return paginated_df, status, df, page, page_info | |
| def change_page(full_df, current_page, direction): | |
| """Changes the displayed page of scraped results. | |
| Args: | |
| full_df (pd.DataFrame): Full DataFrame containing all scraped results. | |
| current_page (int): Current page number. | |
| direction (str): Navigation direction ("next" or "prev"). | |
| Returns: | |
| tuple: A tuple containing: | |
| - pd.DataFrame: Paginated DataFrame with up to 5 rows. | |
| - str: Status message (e.g., "Page updated"). | |
| - pd.DataFrame: Unchanged full DataFrame. | |
| - int: Updated current page number. | |
| - str: Page information (e.g., "Page 2 of 3"). | |
| """ | |
| if full_df.empty: | |
| return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}" | |
| rows_per_page = 5 | |
| total_rows = len(full_df) | |
| total_pages = math.ceil(total_rows / rows_per_page) | |
| if direction == "next" and current_page < total_pages: | |
| current_page += 1 | |
| elif direction == "prev" and current_page > 1: | |
| current_page -= 1 | |
| start_idx = (current_page - 1) * rows_per_page | |
| end_idx = start_idx + rows_per_page | |
| paginated_df = full_df.iloc[start_idx:end_idx] | |
| page_info = f"Page {current_page} of {total_pages}" | |
| status = "Page updated" if not paginated_df.empty else "No results on this page" | |
| return paginated_df, status, full_df, current_page, page_info | |
| def retrieve_notes(): | |
| """Fetches notes from Server 2 and returns them as a DataFrame. | |
| Args: | |
| None | |
| Returns: | |
| tuple: A tuple containing: | |
| - pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"]. | |
| - str: Status message (e.g., "Notes loaded successfully", "No notes found"). | |
| """ | |
| try: | |
| response = requests.get("http://localhost:8001/notes") | |
| result = response.json() | |
| if "error" in result or not result: | |
| return pd.DataFrame(), "No notes found" | |
| processed_results = [ | |
| { | |
| "id": item.get("id", "N/A"), | |
| "topic": item.get("topic", "Summary"), | |
| "notes": item.get("notes", ""), | |
| "url": item.get("url", ""), | |
| "tag": item.get("tag", "General") | |
| } for item in result | |
| ] | |
| df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"]) | |
| return df, "Notes loaded successfully" | |
| except Exception as e: | |
| return pd.DataFrame(), f"Error: {str(e)}" | |
| def filter_notes(notes_df, max_rows, search_query, search_field): | |
| """Filters and searches notes based on user input. | |
| Args: | |
| notes_df (pd.DataFrame): DataFrame containing notes. | |
| max_rows (str): Maximum rows to display ("5", "10", "25", or "All"). | |
| search_query (str): Search term to filter notes. | |
| search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all"). | |
| Returns: | |
| tuple: A tuple containing: | |
| - pd.DataFrame: Filtered DataFrame. | |
| - str: Status message (e.g., "Filtered notes loaded", "No matching notes found"). | |
| """ | |
| if notes_df.empty: | |
| return pd.DataFrame(), "No notes available" | |
| try: | |
| filtered_df = notes_df.copy() | |
| if search_query and search_field: | |
| search_query = search_query.lower() | |
| if search_field == "all": | |
| filtered_df = filtered_df[ | |
| filtered_df.apply( | |
| lambda row: any(search_query in str(val).lower() for val in row), axis=1 | |
| ) | |
| ] | |
| else: | |
| filtered_df = filtered_df[ | |
| filtered_df[search_field].str.lower().str.contains(search_query, na=False) | |
| ] | |
| if max_rows != "All": | |
| max_rows = int(max_rows) | |
| filtered_df = filtered_df.head(max_rows) | |
| status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found" | |
| return filtered_df, status | |
| except Exception as e: | |
| return pd.DataFrame(), f"Error: {str(e)}" | |
| def view_note_content(selected_row: int, notes_df): | |
| """Displays the content of a selected note. | |
| Args: | |
| selected_row (int): Index of the selected row in the DataFrame. | |
| notes_df (pd.DataFrame): DataFrame containing notes. | |
| Returns: | |
| str: The content of the selected note or an error/status message. | |
| """ | |
| if selected_row is None or notes_df.empty: | |
| return "No note selected or no data available" | |
| try: | |
| return notes_df.iloc[selected_row]["notes"] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def download_notes(notes_df, format_choice): | |
| """Downloads notes in CSV or JSON format. | |
| Args: | |
| notes_df (pd.DataFrame): DataFrame containing notes. | |
| format_choice (str): Download format ("CSV" or "JSON"). | |
| Returns: | |
| tuple: A tuple containing: | |
| - gr.File or None: File object with the downloaded content or None if no data. | |
| - str: Status message (e.g., "Download ready", "Data not available to download"). | |
| """ | |
| if notes_df.empty: | |
| return None, "Data not available to download" | |
| try: | |
| if format_choice == "CSV": | |
| content = notes_df.to_csv(index=False) | |
| filename = "notes.csv" | |
| mime_type = "text/csv" | |
| elif format_choice == "JSON": | |
| content = notes_df.to_json(orient="records", lines=True) | |
| filename = "notes.json" | |
| mime_type = "application/json" | |
| else: | |
| return None, "Invalid format selected" | |
| return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready" | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| # Gradio Tabbed Interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Knowledge Store App") | |
| with gr.Tabs(): | |
| # Tab 1: Input Client with Multi-URL Support and Pagination | |
| with gr.Tab(label="URL Scraper"): | |
| url_input = gr.Textbox( | |
| label="Enter Webpage URLs (one per line)", | |
| placeholder="https://example.com\nhttps://wikipedia.org", | |
| lines=5 | |
| ) | |
| scrape_button = gr.Button("Scrape URLs") | |
| scrape_output = gr.Dataframe( | |
| headers=["URL", "Content", "Status"], | |
| label="Scraped Results", | |
| wrap=False | |
| ) | |
| scrape_status = gr.Textbox(label="Status") | |
| with gr.Row(): | |
| prev_button = gr.Button("Previous Page") | |
| next_button = gr.Button("Next Page") | |
| page_info = gr.Textbox(label="Page", value="Page 1", interactive=False) | |
| full_results = gr.State(pd.DataFrame()) | |
| current_page = gr.State(1) | |
| scrape_button.click( | |
| fn=scrape_websites, | |
| inputs=url_input, | |
| outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
| ) | |
| prev_button.click( | |
| fn=change_page, | |
| inputs=[full_results, current_page, gr.State("prev")], | |
| outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
| ) | |
| next_button.click( | |
| fn=change_page, | |
| inputs=[full_results, current_page, gr.State("next")], | |
| outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
| ) | |
| # Tab 2: Retrieval Client with Enhanced Columns | |
| with gr.Tab(label="View Notes"): | |
| with gr.Row(): | |
| max_rows = gr.Dropdown( | |
| choices=["5", "10", "25", "All"], | |
| value="All", | |
| label="Max Rows to Display" | |
| ) | |
| search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term") | |
| search_field = gr.Dropdown( | |
| choices=["id", "topic", "notes", "url", "tag", "all"], | |
| value="all", | |
| label="Search Field" | |
| ) | |
| retrieve_button = gr.Button("Fetch Notes") | |
| notes_table = gr.Dataframe( | |
| headers=["id", "topic", "notes", "url", "tag"], | |
| label="Stored Notes", | |
| interactive=True, | |
| wrap=False | |
| ) | |
| notes_status = gr.Textbox(label="Status") | |
| content_view = gr.Textbox(label="Selected Note Content", lines=5) | |
| with gr.Row(): | |
| format_choice = gr.Dropdown( | |
| choices=["CSV", "JSON"], | |
| value="CSV", | |
| label="Download Format" | |
| ) | |
| download_button = gr.Button("Download Notes") | |
| download_file = gr.File(label="Download File", visible=False) | |
| retrieve_button.click( | |
| fn=retrieve_notes, | |
| outputs=[notes_table, notes_status] | |
| ) | |
| max_rows.change( | |
| fn=filter_notes, | |
| inputs=[notes_table, max_rows, search_query, search_field], | |
| outputs=[notes_table, notes_status] | |
| ) | |
| search_query.change( | |
| fn=filter_notes, | |
| inputs=[notes_table, max_rows, search_query, search_field], | |
| outputs=[notes_table, notes_status] | |
| ) | |
| search_field.change( | |
| fn=filter_notes, | |
| inputs=[notes_table, max_rows, search_query, search_field], | |
| outputs=[notes_table, notes_status] | |
| ) | |
| notes_table.select( | |
| fn=view_note_content, | |
| inputs=[notes_table], | |
| outputs=content_view | |
| ) | |
| download_button.click( | |
| fn=download_notes, | |
| inputs=[notes_table, format_choice], | |
| outputs=[download_file, notes_status] | |
| ) | |
| app.launch() |