wesam0099 commited on
Commit
ffeab2e
·
verified ·
1 Parent(s): 886de7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -21
app.py CHANGED
@@ -12,7 +12,7 @@ from gradio.data_classes import FileData
12
  # Log in to Hugging Face
13
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
14
 
15
- # Initialize the LLM engine
16
  llm_engine = HfEngine("Rohan-Kurdekar/Arabic_Bert_Model")
17
 
18
  # Initialize the agent
@@ -26,12 +26,12 @@ agent = ReactCodeAgent(
26
  # Define the base prompt
27
  base_prompt = """You are an expert data analyst.
28
  According to the features you have and the data structure given below, determine which feature should be the target.
29
- Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
30
  Then answer these questions one by one, by finding the relevant numbers.
31
  Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
32
 
33
- In your final answer: summarize these correlations and trends
34
- After each number derive real worlds insights, for instance: "Correlation between is_december and boredness is 1.3453, which suggest people are more bored in winter".
35
  Your final answer should be a long string with at least 3 numbered and detailed parts.
36
 
37
  Structure of the data:
@@ -41,22 +41,6 @@ The data file is passed to you as the variable data_file, it is a pandas datafra
41
  DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
42
  """
43
 
44
- # Example notes
45
- example_notes = """This data is about the Titanic wreck in 1912.
46
- The target figure is the survival of passengers, noted by 'Survived'
47
- pclass: A proxy for socio-economic status (SES)
48
- 1st = Upper
49
- 2nd = Middle
50
- 3rd = Lower
51
- age: Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5
52
- sibsp: The dataset defines family relations in this way...
53
- Sibling = brother, sister, stepbrother, stepsister
54
- Spouse = husband, wife (mistresses and fiancés were ignored)
55
- parch: The dataset defines family relations in this way...
56
- Parent = mother, father
57
- Child = daughter, son, stepdaughter, stepson
58
- Some children travelled only with a nanny, therefore parch=0 for them."""
59
-
60
  def get_images_in_directory(directory):
61
  image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
62
  image_files = []
@@ -74,7 +58,8 @@ def interact_with_agent(file_input, additional_notes):
74
  shutil.rmtree(figures_dir)
75
  os.makedirs(figures_dir)
76
 
77
- data_file = pd.read_csv(file_input)
 
78
  data_structure_notes = f"""- Description (output of .describe()):
79
  {data_file.describe()}
80
  - Columns with dtypes:
 
12
  # Log in to Hugging Face
13
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
14
 
15
+ # Initialize the LLM engine with an Arabic model
16
  llm_engine = HfEngine("Rohan-Kurdekar/Arabic_Bert_Model")
17
 
18
  # Initialize the agent
 
26
  # Define the base prompt
27
  base_prompt = """You are an expert data analyst.
28
  According to the features you have and the data structure given below, determine which feature should be the target.
29
+ Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
30
  Then answer these questions one by one, by finding the relevant numbers.
31
  Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
32
 
33
+ In your final answer: summarize these correlations and trends.
34
+ After each number derive real world insights, for instance: "Correlation between is_december and boredness is 1.3453, which suggest people are more bored in winter".
35
  Your final answer should be a long string with at least 3 numbered and detailed parts.
36
 
37
  Structure of the data:
 
41
  DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
42
  """
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def get_images_in_directory(directory):
45
  image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
46
  image_files = []
 
58
  shutil.rmtree(figures_dir)
59
  os.makedirs(figures_dir)
60
 
61
+ # Read the CSV file with the appropriate encoding for Arabic text
62
+ data_file = pd.read_csv(file_input.name, encoding='utf-8')
63
  data_structure_notes = f"""- Description (output of .describe()):
64
  {data_file.describe()}
65
  - Columns with dtypes: