import streamlit as st import os import openai from sentence_transformers import SentenceTransformer, util import PyPDF2 openai.api_key = os.getenv("openapikey") model = SentenceTransformer('all-MiniLM-L6-v2') def load_pdf(uploaded_file): with open("temp.pdf", "wb") as f: f.write(uploaded_file.getvalue()) with open("temp.pdf", 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() return text def chunk_text(text, chunk_size=500, overlap=100): chunks = [] for i in range(0, len(text), chunk_size - overlap): chunks.append(text[i:i + chunk_size]) return chunks def create_embeddings(chunks): embeddings = model.encode(chunks, convert_to_tensor=True) return embeddings def find_relevant_chunks(query_embedding, chunk_embeddings, chunks, top_k=3): cosine_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0] top_results = sorted(range(len(cosine_scores)), key=lambda i: cosine_scores[i], reverse=True)[:top_k] relevant_chunks = [chunks[i] for i in top_results] return relevant_chunks def generate_response(query, context): messages = [ {"role": "system", "content": "You are a helpful assistant that answers questions based on provided context."}, {"role": "user", "content": f"Context: {context}\nQuestion: {query}"} ] response = openai.chat.completions.create( model="gpt-3.5-turbo", # Or another suitable chat model messages=messages, max_tokens=200 ) return response.choices[0].message.content.strip() st.title("Simple RAG Application (No LangChain)") uploaded_file = st.file_uploader("Upload PDF", type="pdf") if uploaded_file: with st.spinner("Processing PDF..."): pdf_text = load_pdf(uploaded_file) chunks = chunk_text(pdf_text) chunk_embeddings = create_embeddings(chunks) query = st.text_input("Ask a question:") if query: query_embedding = model.encode([query], convert_to_tensor=True) relevant_chunks = find_relevant_chunks(query_embedding, chunk_embeddings, chunks) context = "\n".join(relevant_chunks) answer = generate_response(query, context) st.write("Answer:", answer)