Spaces:
Sleeping
Sleeping
| import os | |
| from groq import Groq | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from PyPDF2 import PdfReader | |
| import docx | |
| import streamlit as st | |
| # ===================== Groq API Key ===================== | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "your_key_here") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # ===================== Helper Functions ===================== | |
| def read_pdf(file): | |
| pdf = PdfReader(file) | |
| text = "" | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return text | |
| def read_docx(file): | |
| doc = docx.Document(file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| # ===================== Streamlit UI ===================== | |
| st.set_page_config(page_title="📄 RAG App with Groq", layout="wide") | |
| st.title("📄 RAG App with Groq (Open-Source Embeddings)") | |
| uploaded_file = st.file_uploader("Upload a document (PDF, DOCX, or TXT)", type=["pdf", "docx", "txt"]) | |
| if uploaded_file: | |
| # Extract text | |
| if uploaded_file.type == "application/pdf": | |
| raw_text = read_pdf(uploaded_file) | |
| elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| raw_text = read_docx(uploaded_file) | |
| else: | |
| raw_text = uploaded_file.read().decode("utf-8") | |
| # Split text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = text_splitter.split_text(raw_text) | |
| st.success(f"Document loaded and split into {len(chunks)} chunks.") | |
| # ===================== Open-Source Embeddings & FAISS ===================== | |
| st.info("Embedding chunks for retrieval using open-source embeddings...") | |
| hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| faiss_index = FAISS.from_texts(chunks, hf_embeddings) | |
| # ===================== Query Section ===================== | |
| query = st.text_input("Ask something about the document:") | |
| if query: | |
| docs = faiss_index.similarity_search(query, k=3) | |
| context = "\n".join([doc.page_content for doc in docs]) | |
| # Groq LLM for answer generation | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": f"Answer the following question using the context below:\nContext:\n{context}\n\nQuestion:\n{query}"} | |
| ] | |
| ) | |
| answer = response.choices[0].message.content | |
| st.markdown(f"**Answer:** {answer}") | |