Spaces:
Runtime error
Runtime error
File size: 6,508 Bytes
fe07c43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | import streamlit as st
from dotenv import load_dotenv
import os
import sqlite3
import pandas as pd
import google.generativeai as genai
import re
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
import tempfile
# Load environment variables
load_dotenv()
# Configure API keys
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
groq_api_key = os.getenv("GROQ_API_KEY")
# Function to load Google Gemini Model and get response
def get_gemini_response(question, prompt, schema_info):
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content([prompt, schema_info, question])
return response.text
# Function to retrieve query from the database
def read_sql_query(sql, db):
try:
conn = sqlite3.connect(db)
df = pd.read_sql_query(sql, conn)
conn.close()
return df
except sqlite3.Error as e:
st.error(f"An error occurred: {e.args[0]}")
return None
# Function to convert DataFrame to SQLite database
def dataframe_to_sqlite(df, db_name, table_name):
conn = sqlite3.connect(db_name)
df.to_sql(table_name, conn, if_exists='replace', index=False)
conn.close()
st.success("Data successfully loaded into the database!")
# Function to get schema information
def get_schema_info(df):
columns = df.columns.tolist()
dtypes = df.dtypes.astype(str).tolist()
schema_info = "Table name: DATA\nColumns:\n"
for col, dtype in zip(columns, dtypes):
schema_info += f"- {col} ({dtype})\n"
return schema_info
# Function to clean SQL query
def clean_sql_query(query):
query = re.sub(r'```sql|```', '', query)
query = query.strip()
return query
# Function to process PDF file
def process_document(file):
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(file.getvalue())
tmp_file_path = tmp_file.name
loader = PyPDFLoader(tmp_file_path)
documents = loader.load()
os.unlink(tmp_file_path) # Delete the temporary file
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
splits = text_splitter.split_documents(documents)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
return vectorstore.as_retriever()
# Streamlit App
st.set_page_config(page_title="AI-Powered Data Assistant")
st.header("AI-Powered Data Assistant")
# File upload section
st.subheader("Upload Your Data")
excel_file = st.file_uploader("Choose an Excel or CSV file", type=["xlsx", "csv"])
pdf_file = st.file_uploader("Choose a PDF file", type=["pdf"])
if excel_file:
try:
if excel_file.name.endswith('.csv'):
df = pd.read_csv(excel_file)
else:
df = pd.read_excel(excel_file)
dataframe_to_sqlite(df, "data.db", "DATA")
st.write(df.head())
st.success("Excel/CSV file successfully uploaded and data loaded into the database!")
st.session_state['schema_info'] = get_schema_info(df)
except Exception as e:
st.error(f"Error processing the Excel/CSV file: {str(e)}")
if pdf_file:
try:
retriever = process_document(pdf_file)
st.success("PDF file successfully processed!")
st.session_state['retriever'] = retriever
except Exception as e:
st.error(f"Error processing the PDF file: {str(e)}")
# Add radio button for data source selection
data_source = st.radio(
"Choose your data source for the query:",
("PDF", "Excel/CSV"),
index=None,
key="data_source"
)
# User query input
question = st.text_input("Ask a question about your data:", key="input")
submit = st.button("Get Answer")
if submit and question:
if not data_source:
st.error("Please select a data source (PDF or Excel/CSV) before submitting your question.")
elif data_source == "PDF" and 'retriever' in st.session_state:
with st.spinner("Processing your question using the PDF content..."):
llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
pdf_prompt = PromptTemplate.from_template("""
You are an AI assistant specialized in analyzing and answering questions about PDF documents.
Use the provided context to answer the user's question accurately and concisely.
If the answer is not directly stated in the context, use your knowledge to provide a reasonable response,
but make it clear when you're inferring or speculating.
If you cannot answer the question based on the given context, say so clearly.
Context: {context}
Question: {input}
Provide a clear, concise, and informative answer:
""")
rag_chain = (
{"context": st.session_state['retriever'], "input": RunnablePassthrough()}
| pdf_prompt
| llm
)
response = rag_chain.invoke(question)
st.subheader("Answer:")
st.write(response.content)
elif data_source == "Excel/CSV" and 'schema_info' in st.session_state:
with st.spinner("Generating SQL query..."):
sql_query = get_gemini_response(question, "Generate SQL for this query:", st.session_state['schema_info'])
clean_query = clean_sql_query(sql_query)
st.subheader("Generated SQL Query:")
st.code(clean_query, language="sql")
with st.spinner("Executing query and fetching results..."):
result_df = read_sql_query(clean_query, "data.db")
if result_df is not None and not result_df.empty:
st.subheader("Query Result:")
st.dataframe(result_df)
else:
st.warning("No data found or an error occurred while executing the query.")
else:
st.error(f"Unable to process the query. Please make sure you've uploaded the appropriate file ({data_source}).") |