Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from groq import Groq | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| # ----------------------- | |
| # Load models | |
| # ----------------------- | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| groq_client = Groq( | |
| api_key=os.environ.get("GROQ_API_KEY") | |
| # api_key=userdata.get('RAG') | |
| ) | |
| # ----------------------- | |
| # Global storage | |
| # ----------------------- | |
| faiss_index = None | |
| document_chunks = [] | |
| # ----------------------- | |
| # Helper functions | |
| # ----------------------- | |
| def read_pdf(file): | |
| reader = PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def chunk_text(text, chunk_size=500, overlap=50): | |
| chunks = [] | |
| start = 0 | |
| while start < len(text): | |
| end = start + chunk_size | |
| chunk = text[start:end] | |
| chunks.append(chunk) | |
| start = end - overlap | |
| return chunks | |
| def create_faiss_index(chunks): | |
| global faiss_index, document_chunks | |
| document_chunks = chunks | |
| embeddings = embedding_model.encode(chunks) | |
| embeddings = np.array(embeddings).astype("float32") | |
| dimension = embeddings.shape[1] | |
| faiss_index = faiss.IndexFlatL2(dimension) | |
| faiss_index.add(embeddings) | |
| def search_chunks(query, top_k=3): | |
| query_embedding = embedding_model.encode([query]) | |
| query_embedding = np.array(query_embedding).astype("float32") | |
| distances, indices = faiss_index.search(query_embedding, top_k) | |
| results = [] | |
| for idx in indices[0]: | |
| results.append(document_chunks[idx]) | |
| return results | |
| def ask_groq(context, question): | |
| prompt = f""" | |
| Use the text below to answer the question. | |
| Text: | |
| {context} | |
| Question: | |
| {question} | |
| """ | |
| response = groq_client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # ----------------------- | |
| # Gradio functions | |
| # ----------------------- | |
| def upload_pdf(file): | |
| if file is None: | |
| return "Please upload a PDF first." | |
| text = read_pdf(file) | |
| chunks = chunk_text(text) | |
| create_faiss_index(chunks) | |
| return "PDF processed. You can now ask questions." | |
| def answer_question(question): | |
| if faiss_index is None: | |
| return "Please upload a PDF first." | |
| relevant_chunks = search_chunks(question) | |
| context = "\n".join(relevant_chunks) | |
| answer = ask_groq(context, question) | |
| return answer | |
| # ----------------------- | |
| # Gradio UI | |
| # ----------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# PDF Question Answer App") | |
| pdf_file = gr.File(label="Upload PDF") | |
| upload_btn = gr.Button("Process PDF") | |
| status = gr.Textbox(label="Status", lines=1) | |
| question = gr.Textbox( | |
| label="Your Question", | |
| lines=2, | |
| placeholder="Type your question here" | |
| ) | |
| ask_btn = gr.Button("Ask Question") | |
| answer = gr.Textbox( | |
| label="Answer", | |
| lines=10 | |
| ) | |
| upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status) | |
| ask_btn.click(answer_question, inputs=question, outputs=answer) | |
| demo.launch() |