Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from gradio.data_classes import FileData
|
|
| 12 |
# Log in to Hugging Face
|
| 13 |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 14 |
|
| 15 |
-
# Initialize the LLM engine
|
| 16 |
llm_engine = HfEngine("Rohan-Kurdekar/Arabic_Bert_Model")
|
| 17 |
|
| 18 |
# Initialize the agent
|
|
@@ -26,12 +26,12 @@ agent = ReactCodeAgent(
|
|
| 26 |
# Define the base prompt
|
| 27 |
base_prompt = """You are an expert data analyst.
|
| 28 |
According to the features you have and the data structure given below, determine which feature should be the target.
|
| 29 |
-
Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
|
| 30 |
Then answer these questions one by one, by finding the relevant numbers.
|
| 31 |
Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
|
| 32 |
|
| 33 |
-
In your final answer: summarize these correlations and trends
|
| 34 |
-
After each number derive real
|
| 35 |
Your final answer should be a long string with at least 3 numbered and detailed parts.
|
| 36 |
|
| 37 |
Structure of the data:
|
|
@@ -41,22 +41,6 @@ The data file is passed to you as the variable data_file, it is a pandas datafra
|
|
| 41 |
DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
|
| 42 |
"""
|
| 43 |
|
| 44 |
-
# Example notes
|
| 45 |
-
example_notes = """This data is about the Titanic wreck in 1912.
|
| 46 |
-
The target figure is the survival of passengers, noted by 'Survived'
|
| 47 |
-
pclass: A proxy for socio-economic status (SES)
|
| 48 |
-
1st = Upper
|
| 49 |
-
2nd = Middle
|
| 50 |
-
3rd = Lower
|
| 51 |
-
age: Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5
|
| 52 |
-
sibsp: The dataset defines family relations in this way...
|
| 53 |
-
Sibling = brother, sister, stepbrother, stepsister
|
| 54 |
-
Spouse = husband, wife (mistresses and fiancés were ignored)
|
| 55 |
-
parch: The dataset defines family relations in this way...
|
| 56 |
-
Parent = mother, father
|
| 57 |
-
Child = daughter, son, stepdaughter, stepson
|
| 58 |
-
Some children travelled only with a nanny, therefore parch=0 for them."""
|
| 59 |
-
|
| 60 |
def get_images_in_directory(directory):
|
| 61 |
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
|
| 62 |
image_files = []
|
|
@@ -74,7 +58,8 @@ def interact_with_agent(file_input, additional_notes):
|
|
| 74 |
shutil.rmtree(figures_dir)
|
| 75 |
os.makedirs(figures_dir)
|
| 76 |
|
| 77 |
-
|
|
|
|
| 78 |
data_structure_notes = f"""- Description (output of .describe()):
|
| 79 |
{data_file.describe()}
|
| 80 |
- Columns with dtypes:
|
|
|
|
| 12 |
# Log in to Hugging Face
|
| 13 |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 14 |
|
| 15 |
+
# Initialize the LLM engine with an Arabic model
|
| 16 |
llm_engine = HfEngine("Rohan-Kurdekar/Arabic_Bert_Model")
|
| 17 |
|
| 18 |
# Initialize the agent
|
|
|
|
| 26 |
# Define the base prompt
|
| 27 |
base_prompt = """You are an expert data analyst.
|
| 28 |
According to the features you have and the data structure given below, determine which feature should be the target.
|
| 29 |
+
Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
|
| 30 |
Then answer these questions one by one, by finding the relevant numbers.
|
| 31 |
Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
|
| 32 |
|
| 33 |
+
In your final answer: summarize these correlations and trends.
|
| 34 |
+
After each number derive real world insights, for instance: "Correlation between is_december and boredness is 1.3453, which suggest people are more bored in winter".
|
| 35 |
Your final answer should be a long string with at least 3 numbered and detailed parts.
|
| 36 |
|
| 37 |
Structure of the data:
|
|
|
|
| 41 |
DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
|
| 42 |
"""
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def get_images_in_directory(directory):
|
| 45 |
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
|
| 46 |
image_files = []
|
|
|
|
| 58 |
shutil.rmtree(figures_dir)
|
| 59 |
os.makedirs(figures_dir)
|
| 60 |
|
| 61 |
+
# Read the CSV file with the appropriate encoding for Arabic text
|
| 62 |
+
data_file = pd.read_csv(file_input.name, encoding='utf-8')
|
| 63 |
data_structure_notes = f"""- Description (output of .describe()):
|
| 64 |
{data_file.describe()}
|
| 65 |
- Columns with dtypes:
|