Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from dotenv import load_dotenv | |
| import os | |
| import sqlite3 | |
| import pandas as pd | |
| import google.generativeai as genai | |
| import re | |
| from langchain import hub | |
| from langchain_chroma import Chroma | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_groq import ChatGroq | |
| from langchain.prompts import PromptTemplate | |
| from langchain.schema.runnable import RunnablePassthrough | |
| import tempfile | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure API keys | |
| genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
| groq_api_key = os.getenv("GROQ_API_KEY") | |
| # Function to load Google Gemini Model and get response | |
| def get_gemini_response(question, prompt, schema_info): | |
| model = genai.GenerativeModel('gemini-pro') | |
| response = model.generate_content([prompt, schema_info, question]) | |
| return response.text | |
| # Function to retrieve query from the database | |
| def read_sql_query(sql, db): | |
| try: | |
| conn = sqlite3.connect(db) | |
| df = pd.read_sql_query(sql, conn) | |
| conn.close() | |
| return df | |
| except sqlite3.Error as e: | |
| st.error(f"An error occurred: {e.args[0]}") | |
| return None | |
| # Function to convert DataFrame to SQLite database | |
| def dataframe_to_sqlite(df, db_name, table_name): | |
| conn = sqlite3.connect(db_name) | |
| df.to_sql(table_name, conn, if_exists='replace', index=False) | |
| conn.close() | |
| st.success("Data successfully loaded into the database!") | |
| # Function to get schema information | |
| def get_schema_info(df): | |
| columns = df.columns.tolist() | |
| dtypes = df.dtypes.astype(str).tolist() | |
| schema_info = "Table name: DATA\nColumns:\n" | |
| for col, dtype in zip(columns, dtypes): | |
| schema_info += f"- {col} ({dtype})\n" | |
| return schema_info | |
| # Function to clean SQL query | |
| def clean_sql_query(query): | |
| query = re.sub(r'```sql|```', '', query) | |
| query = query.strip() | |
| return query | |
| # Function to process PDF file | |
| def process_document(file): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: | |
| tmp_file.write(file.getvalue()) | |
| tmp_file_path = tmp_file.name | |
| loader = PyPDFLoader(tmp_file_path) | |
| documents = loader.load() | |
| os.unlink(tmp_file_path) # Delete the temporary file | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| splits = text_splitter.split_documents(documents) | |
| vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) | |
| return vectorstore.as_retriever() | |
| # Streamlit App | |
| st.set_page_config(page_title="AI-Powered Data Assistant") | |
| st.header("AI-Powered Data Assistant") | |
| # File upload section | |
| st.subheader("Upload Your Data") | |
| excel_file = st.file_uploader("Choose an Excel or CSV file", type=["xlsx", "csv"]) | |
| pdf_file = st.file_uploader("Choose a PDF file", type=["pdf"]) | |
| if excel_file: | |
| try: | |
| if excel_file.name.endswith('.csv'): | |
| df = pd.read_csv(excel_file) | |
| else: | |
| df = pd.read_excel(excel_file) | |
| dataframe_to_sqlite(df, "data.db", "DATA") | |
| st.write(df.head()) | |
| st.success("Excel/CSV file successfully uploaded and data loaded into the database!") | |
| st.session_state['schema_info'] = get_schema_info(df) | |
| except Exception as e: | |
| st.error(f"Error processing the Excel/CSV file: {str(e)}") | |
| if pdf_file: | |
| try: | |
| retriever = process_document(pdf_file) | |
| st.success("PDF file successfully processed!") | |
| st.session_state['retriever'] = retriever | |
| except Exception as e: | |
| st.error(f"Error processing the PDF file: {str(e)}") | |
| # Add radio button for data source selection | |
| data_source = st.radio( | |
| "Choose your data source for the query:", | |
| ("PDF", "Excel/CSV"), | |
| index=None, | |
| key="data_source" | |
| ) | |
| # User query input | |
| question = st.text_input("Ask a question about your data:", key="input") | |
| submit = st.button("Get Answer") | |
| if submit and question: | |
| if not data_source: | |
| st.error("Please select a data source (PDF or Excel/CSV) before submitting your question.") | |
| elif data_source == "PDF" and 'retriever' in st.session_state: | |
| with st.spinner("Processing your question using the PDF content..."): | |
| llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768") | |
| pdf_prompt = PromptTemplate.from_template(""" | |
| You are an AI assistant specialized in analyzing and answering questions about PDF documents. | |
| Use the provided context to answer the user's question accurately and concisely. | |
| If the answer is not directly stated in the context, use your knowledge to provide a reasonable response, | |
| but make it clear when you're inferring or speculating. | |
| If you cannot answer the question based on the given context, say so clearly. | |
| Context: {context} | |
| Question: {input} | |
| Provide a clear, concise, and informative answer: | |
| """) | |
| rag_chain = ( | |
| {"context": st.session_state['retriever'], "input": RunnablePassthrough()} | |
| | pdf_prompt | |
| | llm | |
| ) | |
| response = rag_chain.invoke(question) | |
| st.subheader("Answer:") | |
| st.write(response.content) | |
| elif data_source == "Excel/CSV" and 'schema_info' in st.session_state: | |
| with st.spinner("Generating SQL query..."): | |
| sql_query = get_gemini_response(question, "Generate SQL for this query:", st.session_state['schema_info']) | |
| clean_query = clean_sql_query(sql_query) | |
| st.subheader("Generated SQL Query:") | |
| st.code(clean_query, language="sql") | |
| with st.spinner("Executing query and fetching results..."): | |
| result_df = read_sql_query(clean_query, "data.db") | |
| if result_df is not None and not result_df.empty: | |
| st.subheader("Query Result:") | |
| st.dataframe(result_df) | |
| else: | |
| st.warning("No data found or an error occurred while executing the query.") | |
| else: | |
| st.error(f"Unable to process the query. Please make sure you've uploaded the appropriate file ({data_source}).") |