Spaces:

Anupam202224
/

DataAnalysis-A

Build error

App Files Files Community

Anupam202224 commited on Oct 11, 2024

Commit

c4c8dcf

verified ·

1 Parent(s): 8fa43d0

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -68

app.py CHANGED Viewed

@@ -1,26 +1,29 @@
 import os
 import shutil
 import gradio as gr
-from transformers import ReactCodeAgent, HfEngine, Tool
 import pandas as pd
-import spaces
 import torch
-from gradio import Chatbot
-from streaming import stream_to_gradio
-from huggingface_hub import login
-from gradio.data_classes import FileData
-llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-70B-Instruct")
-agent = ReactCodeAgent(
-    tools=[],
-    llm_engine=llm_engine,
-    additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "scipy.stats"],
-    max_iterations=10,
-)
 base_prompt = """You are an expert data analyst.
 According to the features you have and the data structure given below, determine which feature should be the target.
 Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
@@ -38,25 +41,24 @@ The data file is passed to you as the variable data_file, it is a pandas datafra
 DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
 """
-example_notes="""This data is about the Titanic wreck in 1912.
-The target figure is the survival of passengers, notes by 'Survived'
 pclass: A proxy for socio-economic status (SES)
 1st = Upper
 2nd = Middle
 3rd = Lower
-age: Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5
 sibsp: The dataset defines family relations in this way...
 Sibling = brother, sister, stepbrother, stepsister
 Spouse = husband, wife (mistresses and fiancés were ignored)
 parch: The dataset defines family relations in this way...
 Parent = mother, father
 Child = daughter, son, stepdaughter, stepson
-Some children travelled only with a nanny, therefore parch=0 for them."""
-@spaces.GPU
 def get_images_in_directory(directory):
     image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
     image_files = []
     for root, dirs, files in os.walk(directory):
         for file in files:
@@ -64,73 +66,105 @@ def get_images_in_directory(directory):
                 image_files.append(os.path.join(root, file))
     return image_files
-@spaces.GPU
-def interact_with_agent(file_input, additional_notes):
-    shutil.rmtree("./figures")
-    os.makedirs("./figures")
-    data_file = pd.read_csv(file_input)
     data_structure_notes = f"""- Description (output of .describe()):
-    {data_file.describe()}
-    - Columns with dtypes:
-    {data_file.dtypes}"""
     prompt = base_prompt.format(structure_notes=data_structure_notes)
-    if additional_notes and len(additional_notes) > 0:
         prompt += "\nAdditional notes on the data:\n" + additional_notes
-    messages = [gr.ChatMessage(role="user", content=prompt)]
-    yield messages + [
-        gr.ChatMessage(role="assistant", content="⏳ _Starting task..._")
-    ]
-    plot_image_paths = {}
-    for msg in stream_to_gradio(agent, prompt, data_file=data_file):
-        messages.append(msg)
-        for image_path in get_images_in_directory("./figures"):
-            if image_path not in plot_image_paths:
-                image_message = gr.ChatMessage(
-                    role="assistant",
-                    content=FileData(path=image_path, mime_type="image/png"),
-                )
-                plot_image_paths[image_path] = True
-                messages.append(image_message)
-        yield messages + [
-            gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
-        ]
-    yield messages
 with gr.Blocks(
     theme=gr.themes.Soft(
         primary_hue=gr.themes.colors.yellow,
         secondary_hue=gr.themes.colors.blue,
     )
 ) as demo:
-    gr.Markdown("""# Llama-3.1 Data analyst 📊🤔
-Drop a `.csv` file below, add notes to describe this data if needed, and **Llama-3.1-70B will analyze the file content and draw figures for you!**""")
-    file_input = gr.File(label="Your file to analyze")
-    text_input = gr.Textbox(
-        label="Additional notes to support the analysis"
-    )
     submit = gr.Button("Run analysis!", variant="primary")
     chatbot = gr.Chatbot(
         label="Data Analyst Agent",
-        type="messages",
-        avatar_images=(
-            None,
-            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
-        ),
     )
     gr.Examples(
         examples=[["./example/titanic.csv", example_notes]],
         inputs=[file_input, text_input],
         cache_examples=False
     )
-    submit.click(interact_with_agent, [file_input, text_input], [chatbot])
 if __name__ == "__main__":
-    demo.launch()

 import os
 import shutil
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import pandas as pd
 import torch
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Define constants
+MODEL_NAME = "meta-llama/Llama-2-7b-hf"  # Replace with a smaller model suitable for CPU
+FIGURES_DIR = "./figures"
+# Ensure the figures directory exists
+os.makedirs(FIGURES_DIR, exist_ok=True)
+# Initialize tokenizer and model
+# Note: Loading large models on CPU can be very slow and may not be feasible
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    exit(1)
+# Define the base prompt
 base_prompt = """You are an expert data analyst.
 According to the features you have and the data structure given below, determine which feature should be the target.
 Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
 DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
 """
+example_notes = """This data is about the Titanic wreck in 1912.
+The target figure is the survival of passengers, noted by 'Survived'.
 pclass: A proxy for socio-economic status (SES)
 1st = Upper
 2nd = Middle
 3rd = Lower
+age: Age is fractional if less than 1. If the age is estimated, it is in the form of xx.5
 sibsp: The dataset defines family relations in this way...
 Sibling = brother, sister, stepbrother, stepsister
 Spouse = husband, wife (mistresses and fiancés were ignored)
 parch: The dataset defines family relations in this way...
 Parent = mother, father
 Child = daughter, son, stepdaughter, stepson
+Some children traveled only with a nanny, therefore parch=0 for them."""
 def get_images_in_directory(directory):
+    """Retrieve all image file paths from the specified directory."""
     image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
     image_files = []
     for root, dirs, files in os.walk(directory):
         for file in files:
                 image_files.append(os.path.join(root, file))
     return image_files
+def generate_response(prompt):
+    """Generate a response from the language model based on the prompt."""
+    inputs = tokenizer(prompt, return_tensors="pt")
+    inputs = inputs.to('cpu')  # Ensure the model runs on CPU
+    # Generate response (adjust parameters as needed)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=2048,
+            do_sample=True,
+            top_p=0.95,
+            temperature=0.7,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+def interact_with_agent(file_input, additional_notes):
+    """Process the uploaded file and interact with the language model to analyze data."""
+    # Clear and recreate the figures directory
+    if os.path.exists(FIGURES_DIR):
+        shutil.rmtree(FIGURES_DIR)
+    os.makedirs(FIGURES_DIR, exist_ok=True)
+    # Load the data file into a pandas dataframe
+    try:
+        data_file = pd.read_csv(file_input.name)
+    except Exception as e:
+        yield [("Error loading CSV file.",)]
+        return
+    # Create structure notes
     data_structure_notes = f"""- Description (output of .describe()):
+{data_file.describe()}
+- Columns with dtypes:
+{data_file.dtypes}"""
+    # Construct the prompt
     prompt = base_prompt.format(structure_notes=data_structure_notes)
+    if additional_notes and additional_notes.strip():
         prompt += "\nAdditional notes on the data:\n" + additional_notes
+    # Initialize chat history
+    messages = [("User", prompt)]
+    yield messages + [("Assistant", "⏳ _Starting analysis..._")]
+    # Generate response from the model
+    response = generate_response(prompt)
+    messages.append(("Assistant", response))
+    # Extract and display generated images
+    image_paths = get_images_in_directory(FIGURES_DIR)
+    for image_path in image_paths:
+        messages.append(("Assistant", gr.Image.update(value=image_path)))
+    yield messages
+# Define the Gradio interface
 with gr.Blocks(
     theme=gr.themes.Soft(
         primary_hue=gr.themes.colors.yellow,
         secondary_hue=gr.themes.colors.blue,
     )
 ) as demo:
+    gr.Markdown("""# Llama-2 Data Analyst 📊🤔
+Drop a `.csv` file below, add notes to describe this data if needed, and **the model will analyze the file content and draw figures for you!**""")
+    with gr.Row():
+        file_input = gr.File(label="Your file to analyze", type="file")
+        text_input = gr.Textbox(
+            label="Additional notes to support the analysis",
+            placeholder="Enter any additional notes here..."
+        )
     submit = gr.Button("Run analysis!", variant="primary")
     chatbot = gr.Chatbot(
         label="Data Analyst Agent",
+        height=400,
     )
     gr.Examples(
         examples=[["./example/titanic.csv", example_notes]],
         inputs=[file_input, text_input],
         cache_examples=False
     )
+    # Connect the submit button to the interact_with_agent function
+    submit.click(
+        interact_with_agent,
+        inputs=[file_input, text_input],
+        outputs=[chatbot],
+        show_progress=True
+    )
+# Launch the Gradio app
 if __name__ == "__main__":
+    demo.launch()