Spaces:

mayankjen
/

Talk-With-Your-Database

Runtime error

App Files Files Community

Talk-With-Your-Database / app.py

mayankjen

first upload

fe07c43 verified almost 2 years ago

raw

history blame contribute delete

6.51 kB

	import streamlit as st
	from dotenv import load_dotenv
	import os
	import sqlite3
	import pandas as pd
	import google.generativeai as genai
	import re
	from langchain import hub
	from langchain_chroma import Chroma
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_openai import OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_groq import ChatGroq
	from langchain.prompts import PromptTemplate
	from langchain.schema.runnable import RunnablePassthrough
	import tempfile

	# Load environment variables
	load_dotenv()

	# Configure API keys
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
	groq_api_key = os.getenv("GROQ_API_KEY")

	# Function to load Google Gemini Model and get response
	def get_gemini_response(question, prompt, schema_info):
	model = genai.GenerativeModel('gemini-pro')
	response = model.generate_content([prompt, schema_info, question])
	return response.text

	# Function to retrieve query from the database
	def read_sql_query(sql, db):
	try:
	conn = sqlite3.connect(db)
	df = pd.read_sql_query(sql, conn)
	conn.close()
	return df
	except sqlite3.Error as e:
	st.error(f"An error occurred: {e.args[0]}")
	return None

	# Function to convert DataFrame to SQLite database
	def dataframe_to_sqlite(df, db_name, table_name):
	conn = sqlite3.connect(db_name)
	df.to_sql(table_name, conn, if_exists='replace', index=False)
	conn.close()
	st.success("Data successfully loaded into the database!")

	# Function to get schema information
	def get_schema_info(df):
	columns = df.columns.tolist()
	dtypes = df.dtypes.astype(str).tolist()
	schema_info = "Table name: DATA\nColumns:\n"
	for col, dtype in zip(columns, dtypes):
	schema_info += f"- {col} ({dtype})\n"
	return schema_info

	# Function to clean SQL query
	def clean_sql_query(query):
	query = re.sub(r'```sql\|```', '', query)
	query = query.strip()
	return query

	# Function to process PDF file
	def process_document(file):
	with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
	tmp_file.write(file.getvalue())
	tmp_file_path = tmp_file.name

	loader = PyPDFLoader(tmp_file_path)
	documents = loader.load()

	os.unlink(tmp_file_path) # Delete the temporary file

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	splits = text_splitter.split_documents(documents)

	vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
	return vectorstore.as_retriever()

	# Streamlit App
	st.set_page_config(page_title="AI-Powered Data Assistant")
	st.header("AI-Powered Data Assistant")

	# File upload section
	st.subheader("Upload Your Data")
	excel_file = st.file_uploader("Choose an Excel or CSV file", type=["xlsx", "csv"])
	pdf_file = st.file_uploader("Choose a PDF file", type=["pdf"])

	if excel_file:
	try:
	if excel_file.name.endswith('.csv'):
	df = pd.read_csv(excel_file)
	else:
	df = pd.read_excel(excel_file)

	dataframe_to_sqlite(df, "data.db", "DATA")
	st.write(df.head())
	st.success("Excel/CSV file successfully uploaded and data loaded into the database!")

	st.session_state['schema_info'] = get_schema_info(df)
	except Exception as e:
	st.error(f"Error processing the Excel/CSV file: {str(e)}")

	if pdf_file:
	try:
	retriever = process_document(pdf_file)
	st.success("PDF file successfully processed!")
	st.session_state['retriever'] = retriever
	except Exception as e:
	st.error(f"Error processing the PDF file: {str(e)}")

	# Add radio button for data source selection
	data_source = st.radio(
	"Choose your data source for the query:",
	("PDF", "Excel/CSV"),
	index=None,
	key="data_source"
	)

	# User query input
	question = st.text_input("Ask a question about your data:", key="input")
	submit = st.button("Get Answer")

	if submit and question:
	if not data_source:
	st.error("Please select a data source (PDF or Excel/CSV) before submitting your question.")
	elif data_source == "PDF" and 'retriever' in st.session_state:
	with st.spinner("Processing your question using the PDF content..."):
	llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
	pdf_prompt = PromptTemplate.from_template("""
	You are an AI assistant specialized in analyzing and answering questions about PDF documents.
	Use the provided context to answer the user's question accurately and concisely.
	If the answer is not directly stated in the context, use your knowledge to provide a reasonable response,
	but make it clear when you're inferring or speculating.
	If you cannot answer the question based on the given context, say so clearly.

	Context: {context}

	Question: {input}

	Provide a clear, concise, and informative answer:
	""")

	rag_chain = (
	{"context": st.session_state['retriever'], "input": RunnablePassthrough()}
	\| pdf_prompt
	\| llm
	)

	response = rag_chain.invoke(question)

	st.subheader("Answer:")
	st.write(response.content)

	elif data_source == "Excel/CSV" and 'schema_info' in st.session_state:
	with st.spinner("Generating SQL query..."):
	sql_query = get_gemini_response(question, "Generate SQL for this query:", st.session_state['schema_info'])
	clean_query = clean_sql_query(sql_query)

	st.subheader("Generated SQL Query:")
	st.code(clean_query, language="sql")

	with st.spinner("Executing query and fetching results..."):
	result_df = read_sql_query(clean_query, "data.db")

	if result_df is not None and not result_df.empty:
	st.subheader("Query Result:")
	st.dataframe(result_df)
	else:
	st.warning("No data found or an error occurred while executing the query.")

	else:
	st.error(f"Unable to process the query. Please make sure you've uploaded the appropriate file ({data_source}).")