import os import gradio as gr import faiss import numpy as np from groq import Groq from sentence_transformers import SentenceTransformer from PyPDF2 import PdfReader import requests from io import BytesIO # -------- SETTINGS -------- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" INDEX_FILE = "faiss_index.bin" CHUNKS_FILE = "chunks.npy" CHUNK_SIZE = 500 TOP_K = 3 # ASME PDF URLs (fixed for this app) PDF_URLS = [ "https://www.asme.org/wwwasmeorg/media/resourcefiles/aboutasme/who%20we%20are/standards_and_certification/asme_codes_and_standards-examples_of_use_for_mechanical_engineering_students.pdf", "https://www.asme.org/wwwasmeorg/media/resourcefiles/campaigns/marketing/2012/the-state-of-mechanical-engineering-survey.pdf" ] # Load embedding model embedder = SentenceTransformer(EMBED_MODEL) embed_dim = embedder.get_sentence_embedding_dimension() # Initialize or load FAISS + chunks if os.path.exists(INDEX_FILE) and os.path.exists(CHUNKS_FILE): index = faiss.read_index(INDEX_FILE) chunks = np.load(CHUNKS_FILE, allow_pickle=True).tolist() else: index = faiss.IndexFlatL2(embed_dim) chunks = [] # Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # -------- FUNCTIONS -------- def pdf_url_to_chunks(pdf_url, chunk_size=CHUNK_SIZE): resp = requests.get(pdf_url) resp.raise_for_status() pdf_bytes = BytesIO(resp.content) reader = PdfReader(pdf_bytes) text_all = "" for page in reader.pages: page_text = page.extract_text() if page_text: text_all += page_text + " " words = text_all.split() return [ " ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size) if len(words[i:i+chunk_size]) > 20 ] def build_vector_db(): global index, chunks if len(chunks) > 0: return f"✅ Knowledge base already built with {len(chunks)} chunks." new_chunks = [] for url in PDF_URLS: url_chunks = pdf_url_to_chunks(url) new_chunks.extend(url_chunks) embeddings = embedder.encode(new_chunks, convert_to_numpy=True) index.add(embeddings) chunks.extend(new_chunks) faiss.write_index(index, INDEX_FILE) np.save(CHUNKS_FILE, np.array(chunks, dtype=object)) return f"✅ Knowledge base built with {len(chunks)} chunks." def retrieve_chunks(query, top_k=TOP_K): if len(chunks) == 0: return [] query_vec = embedder.encode([query], convert_to_numpy=True) distances, indices = index.search(query_vec, top_k) return [chunks[i] for i in indices[0] if i < len(chunks)] def ask_with_rag(query): if len(chunks) == 0: return "⚠️ Please build the knowledge base first." retrieved = retrieve_chunks(query) context = "\n\n".join(retrieved) prompt = f"""You are an assistant knowledgeable in ASME Standards. Context: {context} User Query: {query} Answer using the context. If not found, reply: “I could not find it in the provided ASME documents.”""" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile", ) return chat_completion.choices[0].message.content # -------- GRADIO UI -------- with gr.Blocks() as demo: gr.Markdown("# 📘 ASME RAG Assistant") gr.Markdown("This app is powered by FAISS + Groq LLM. Knowledge base is built from 2 official ASME PDFs.") build_btn = gr.Button("Build Knowledge Base") build_status = gr.Textbox(label="Status") build_btn.click(build_vector_db, inputs=None, outputs=build_status) query_input = gr.Textbox(label="Ask a Question", lines=1) answer_output = gr.Textbox(label="Answer", lines=5) query_btn = gr.Button("Ask") query_btn.click(ask_with_rag, inputs=query_input, outputs=answer_output) demo.launch()