import os from groq import Groq from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS import gradio as gr # Initialize Groq groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Load documents and create vector DB def load_documents_and_create_vectorstore(): docs = [] for file in ["documents/ASTM1.pdf", "documents/ASTM2.pdf"]: loader = PyPDFLoader(file) docs.extend(loader.load()) splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(chunks, embeddings) return vectorstore # Load documents on startup vectorstore = load_documents_and_create_vectorstore() # RAG question answering function def ask_question(question): retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) docs = retriever.get_relevant_documents(question) context = "\n".join([doc.page_content for doc in docs]) prompt = f"""You are a helpful Civil Engineering assistant. Use the following ASTM standard context to answer: Context: {context} Question: {question} Answer:""" completion = groq_client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile" ) return completion.choices[0].message.content # Gradio UI gr.Interface( fn=ask_question, inputs=gr.Textbox(label="Ask a Civil Engineering Question (based on ASTM)"), outputs=gr.Textbox(label="Answer"), title="Civil Engineering RAG Assistant", description="Ask questions from uploaded ASTM PDFs" ).launch()