Spaces:

WD101
/

One_App_To_Rule_Them_All

Sleeping

File size: 10,889 Bytes

#import gradio as gr

#with gr.Blocks(theme=gr.themes.Glass()) as demo:
#  open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
#  open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")

#demo.launch()

import gradio as gr
import pandas as pd
import requests
import math
import json


def scrape_websites(urls_input):
  """Scrapes multiple URLs and returns results in a paginated DataFrame.

  Args:
      urls_input (str): A string containing URLs, one per line.

  Returns:
      tuple: A tuple containing:
          - pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"].
          - str: Status message (e.g., "Scraping completed", "No URLs provided").
          - pd.DataFrame: Full DataFrame with all results.
          - int: Current page number (initially 1).
          - str: Page information (e.g., "Page 1").
  """
  if not urls_input:
    return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1"

  urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
  if not urls:
    return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1"

  results = []
  for url in urls:
    try:
      response = requests.post("http://localhost:8000/scrape", json={"url": url})
      result = response.json()
      if "error" in result:
        results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"})
      else:
        results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"})
    except Exception as e:
      results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"})

  df = pd.DataFrame(results, columns=["URL", "Content", "Status"])
  paginated_df = df.head(5)
  status = "Scraping completed" if results else "No results to display"
  page = 1
  page_info = f"Page {page}"
  return paginated_df, status, df, page, page_info


def change_page(full_df, current_page, direction):
  """Changes the displayed page of scraped results.

  Args:
      full_df (pd.DataFrame): Full DataFrame containing all scraped results.
      current_page (int): Current page number.
      direction (str): Navigation direction ("next" or "prev").

  Returns:
      tuple: A tuple containing:
          - pd.DataFrame: Paginated DataFrame with up to 5 rows.
          - str: Status message (e.g., "Page updated").
          - pd.DataFrame: Unchanged full DataFrame.
          - int: Updated current page number.
          - str: Page information (e.g., "Page 2 of 3").
  """
  if full_df.empty:
    return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}"

  rows_per_page = 5
  total_rows = len(full_df)
  total_pages = math.ceil(total_rows / rows_per_page)

  if direction == "next" and current_page < total_pages:
    current_page += 1
  elif direction == "prev" and current_page > 1:
    current_page -= 1

  start_idx = (current_page - 1) * rows_per_page
  end_idx = start_idx + rows_per_page
  paginated_df = full_df.iloc[start_idx:end_idx]

  page_info = f"Page {current_page} of {total_pages}"
  status = "Page updated" if not paginated_df.empty else "No results on this page"
  return paginated_df, status, full_df, current_page, page_info


def retrieve_notes():
  """Fetches notes from Server 2 and returns them as a DataFrame.

  Args:
      None

  Returns:
      tuple: A tuple containing:
          - pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"].
          - str: Status message (e.g., "Notes loaded successfully", "No notes found").
  """
  try:
    response = requests.get("http://localhost:8001/notes")
    result = response.json()
    if "error" in result or not result:
      return pd.DataFrame(), "No notes found"
    processed_results = [
      {
        "id": item.get("id", "N/A"),
        "topic": item.get("topic", "Summary"),
        "notes": item.get("notes", ""),
        "url": item.get("url", ""),
        "tag": item.get("tag", "General")
      } for item in result
    ]
    df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"])
    return df, "Notes loaded successfully"
  except Exception as e:
    return pd.DataFrame(), f"Error: {str(e)}"


def filter_notes(notes_df, max_rows, search_query, search_field):
  """Filters and searches notes based on user input.

  Args:
      notes_df (pd.DataFrame): DataFrame containing notes.
      max_rows (str): Maximum rows to display ("5", "10", "25", or "All").
      search_query (str): Search term to filter notes.
      search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all").

  Returns:
      tuple: A tuple containing:
          - pd.DataFrame: Filtered DataFrame.
          - str: Status message (e.g., "Filtered notes loaded", "No matching notes found").
  """
  if notes_df.empty:
    return pd.DataFrame(), "No notes available"
  try:
    filtered_df = notes_df.copy()
    if search_query and search_field:
      search_query = search_query.lower()
      if search_field == "all":
        filtered_df = filtered_df[
          filtered_df.apply(
            lambda row: any(search_query in str(val).lower() for val in row), axis=1
          )
        ]
      else:
        filtered_df = filtered_df[
          filtered_df[search_field].str.lower().str.contains(search_query, na=False)
        ]
    if max_rows != "All":
      max_rows = int(max_rows)
      filtered_df = filtered_df.head(max_rows)
    status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found"
    return filtered_df, status
  except Exception as e:
    return pd.DataFrame(), f"Error: {str(e)}"


def view_note_content(selected_row: int, notes_df):
  """Displays the content of a selected note.

  Args:
      selected_row (int): Index of the selected row in the DataFrame.
      notes_df (pd.DataFrame): DataFrame containing notes.

  Returns:
      str: The content of the selected note or an error/status message.
  """
  if selected_row is None or notes_df.empty:
    return "No note selected or no data available"
  try:
    return notes_df.iloc[selected_row]["notes"]
  except Exception as e:
    return f"Error: {str(e)}"


def download_notes(notes_df, format_choice):
  """Downloads notes in CSV or JSON format.

  Args:
      notes_df (pd.DataFrame): DataFrame containing notes.
      format_choice (str): Download format ("CSV" or "JSON").

  Returns:
      tuple: A tuple containing:
          - gr.File or None: File object with the downloaded content or None if no data.
          - str: Status message (e.g., "Download ready", "Data not available to download").
  """
  if notes_df.empty:
    return None, "Data not available to download"
  try:
    if format_choice == "CSV":
      content = notes_df.to_csv(index=False)
      filename = "notes.csv"
      mime_type = "text/csv"
    elif format_choice == "JSON":
      content = notes_df.to_json(orient="records", lines=True)
      filename = "notes.json"
      mime_type = "application/json"
    else:
      return None, "Invalid format selected"

    return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready"
  except Exception as e:
    return None, f"Error: {str(e)}"


# Gradio Tabbed Interface
with gr.Blocks() as app:
  gr.Markdown("# Knowledge Store App")
  with gr.Tabs():
    # Tab 1: Input Client with Multi-URL Support and Pagination
    with gr.Tab(label="URL Scraper"):
      url_input = gr.Textbox(
        label="Enter Webpage URLs (one per line)",
        placeholder="https://example.com\nhttps://wikipedia.org",
        lines=5
      )
      scrape_button = gr.Button("Scrape URLs")
      scrape_output = gr.Dataframe(
        headers=["URL", "Content", "Status"],
        label="Scraped Results",
        wrap=False
      )
      scrape_status = gr.Textbox(label="Status")
      with gr.Row():
        prev_button = gr.Button("Previous Page")
        next_button = gr.Button("Next Page")
        page_info = gr.Textbox(label="Page", value="Page 1", interactive=False)
      full_results = gr.State(pd.DataFrame())
      current_page = gr.State(1)

      scrape_button.click(
        fn=scrape_websites,
        inputs=url_input,
        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
      )
      prev_button.click(
        fn=change_page,
        inputs=[full_results, current_page, gr.State("prev")],
        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
      )
      next_button.click(
        fn=change_page,
        inputs=[full_results, current_page, gr.State("next")],
        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
      )

    # Tab 2: Retrieval Client with Enhanced Columns
    with gr.Tab(label="View Notes"):
      with gr.Row():
        max_rows = gr.Dropdown(
          choices=["5", "10", "25", "All"],
          value="All",
          label="Max Rows to Display"
        )
        search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term")
        search_field = gr.Dropdown(
          choices=["id", "topic", "notes", "url", "tag", "all"],
          value="all",
          label="Search Field"
        )
      retrieve_button = gr.Button("Fetch Notes")
      notes_table = gr.Dataframe(
        headers=["id", "topic", "notes", "url", "tag"],
        label="Stored Notes",
        interactive=True,
        wrap=False
      )
      notes_status = gr.Textbox(label="Status")
      content_view = gr.Textbox(label="Selected Note Content", lines=5)
      with gr.Row():
        format_choice = gr.Dropdown(
          choices=["CSV", "JSON"],
          value="CSV",
          label="Download Format"
        )
        download_button = gr.Button("Download Notes")
      download_file = gr.File(label="Download File", visible=False)

      retrieve_button.click(
        fn=retrieve_notes,
        outputs=[notes_table, notes_status]
      )
      max_rows.change(
        fn=filter_notes,
        inputs=[notes_table, max_rows, search_query, search_field],
        outputs=[notes_table, notes_status]
      )
      search_query.change(
        fn=filter_notes,
        inputs=[notes_table, max_rows, search_query, search_field],
        outputs=[notes_table, notes_status]
      )
      search_field.change(
        fn=filter_notes,
        inputs=[notes_table, max_rows, search_query, search_field],
        outputs=[notes_table, notes_status]
      )
      notes_table.select(
        fn=view_note_content,
        inputs=[notes_table],
        outputs=content_view
      )
      download_button.click(
        fn=download_notes,
        inputs=[notes_table, format_choice],
        outputs=[download_file, notes_status]
      )

app.launch()