Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import tempfile | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import openai | |
| # OpenAI API key configuration | |
| st.set_page_config(page_title="RAG Chatbot with Files", layout="centered") | |
| openai.api_key = st.sidebar.text_input("Enter OpenAI API Key:", type="password") | |
| # Initialize FAISS and embedding model | |
| embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| faiss_index = None | |
| data_chunks = [] | |
| chunk_mapping = {} | |
| # File Upload and Processing | |
| def load_files(uploaded_files): | |
| global data_chunks, chunk_mapping, faiss_index | |
| data_chunks = [] | |
| chunk_mapping = {} | |
| for uploaded_file in uploaded_files: | |
| file_type = uploaded_file.name.split('.')[-1] | |
| with tempfile.NamedTemporaryFile(delete=False) as tmp_file: | |
| tmp_file.write(uploaded_file.read()) | |
| tmp_file_path = tmp_file.name | |
| if file_type == "csv": | |
| df = pd.read_csv(tmp_file_path) | |
| content = "\n".join(df.astype(str).values.flatten()) | |
| elif file_type == "xlsx": | |
| df = pd.read_excel(tmp_file_path) | |
| content = "\n".join(df.astype(str).values.flatten()) | |
| elif file_type == "pdf": | |
| reader = PdfReader(tmp_file_path) | |
| content = "".join([page.extract_text() for page in reader.pages]) | |
| else: | |
| st.error(f"Unsupported file type: {file_type}") | |
| continue | |
| # Split into chunks | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_text(content) | |
| data_chunks.extend(chunks) | |
| chunk_mapping.update({i: (uploaded_file.name, chunk) for i, chunk in enumerate(chunks)}) | |
| # Create FAISS index | |
| embeddings = embedding_model.encode(data_chunks) | |
| faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| faiss_index.add(embeddings) | |
| # Query Processing | |
| def handle_query(query): | |
| if not faiss_index: | |
| return "No data available. Please upload files first." | |
| # Generate embedding for the query | |
| query_embedding = embedding_model.encode([query]) | |
| distances, indices = faiss_index.search(query_embedding, k=5) | |
| relevant_chunks = [chunk_mapping[idx][1] for idx in indices[0]] | |
| # Use OpenAI for summarization | |
| prompt = "Summarize the following information:\n" + "\n".join(relevant_chunks) | |
| response = openai.Completion.create( | |
| engine="text-davinci-003", | |
| prompt=prompt, | |
| max_tokens=150 | |
| ) | |
| return response['choices'][0]['text'] | |
| # Streamlit UI | |
| def main(): | |
| st.title("RAG Chatbot with Files") | |
| st.sidebar.title("Options") | |
| uploaded_files = st.sidebar.file_uploader("Upload files (CSV, Excel, PDF):", type=["csv", "xlsx", "pdf"], accept_multiple_files=True) | |
| if uploaded_files: | |
| load_files(uploaded_files) | |
| st.sidebar.success("Files loaded successfully!") | |
| query = st.text_input("Ask a question about the data:") | |
| if st.button("Get Answer"): | |
| if openai.api_key and query: | |
| answer = handle_query(query) | |
| st.subheader("Answer:") | |
| st.write(answer) | |
| else: | |
| st.error("Please provide a valid API key and query.") | |
| if __name__ == "__main__": | |
| main() | |