Spaces:

tanlocc
/

geminsights

Sleeping

App Files Files Community

geminsights / app.py

izammohammed

Update app.py

a0a961a about 2 years ago

raw

history blame contribute delete

3.22 kB

	import streamlit as st
	import pandas as pd
	import os
	from utils import save_json, load_json
	from markdown import markdown
	from utils import load_json
	from autoviz import AutoViz_Class
	import base64
	from google.cloud import aiplatform
	import base64
	import vertexai
	from vertexai.preview.generative_models import GenerativeModel, Part

	#setup cloud
	aiplatform.init(
	project = "ultra-heading-407815",
	location="us-central1"
	)

	os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"


	dataframe = None
	st.title("GemInsights")
	file = st.file_uploader(
	"Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
	)

	if file is not None:
	_, extension = os.path.splitext(file.name)
	if extension == ".csv":
	dataframe = pd.read_csv(file)
	else:
	dataframe = pd.read_excel(file)
	st.write(dataframe.head())
	st.write(f"updated a dataframe with shape {dataframe.shape}")

	if file is not None:
	text_input = st.text_input(
	"Enter something about the data 👇",
	label_visibility="visible",
	disabled=False,
	placeholder="eg:- This is a sales dataframe",
	)

	option = st.selectbox(
	"Which is the target column?",
	tuple(list(dataframe.columns)),
	index=None,
	placeholder="Select one column in here",
	)

	def plot(dataframe, target):

	AV = AutoViz_Class()

	dft = AV.AutoViz(
	"",
	sep=",",
	depVar=target,
	dfte=dataframe,
	header=0,
	verbose=2,
	lowess=False,
	chart_format="jpg",
	max_rows_analyzed=500,
	max_cols_analyzed=20,
	save_plot_dir="plots",
	)

	def prompt_make(dataframe, target, info):
	images = []
	image_dir = f"plots/{target}"
	image_files = os.listdir(image_dir)
	for image_file in image_files:
	image_path = os.path.join(image_dir, image_file)
	img = open(image_path, "rb").read()
	img_bytes = Part.from_data(
	base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
	)
	images.append(img_bytes)
	with open("prompt.txt", "rb") as file:
	data = file.read()
	prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"

	# print(f"{prompt}")
	# print(images)
	return prompt, images

	def generate_res(prompt, images):
	print("prompting ...")
	model = GenerativeModel("gemini-pro-vision")
	responses = model.generate_content(
	[prompt]+images,
	generation_config={
	"max_output_tokens": 2048,
	"temperature": 0.4,
	"top_p": 1,
	"top_k": 32
	},
	)
	return responses.text



	def generate(dataframe, text_input, option):
	plot(dataframe, option)
	prompt, images = prompt_make(dataframe, option, text_input)
	res = generate_res(prompt, images)
	return res

	if st.button("Get Insights", type="primary"):
	st.write("Visualising the data into various plots 📊...")
	st.write("Generating insights from the visualization of the data 💡...")
	# running the pipeline

	response = generate(dataframe, text_input, option)
	res = markdown(response)
	st.markdown(res, unsafe_allow_html=True)

	else:
	st.write("")