Spaces:

isana25
/

RAG_Application

Sleeping

App Files Files Community

isana25 commited on May 14, 2025

Commit

97f8372

verified ·

1 Parent(s): 298ec1c

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -56

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import faiss
 from sentence_transformers import SentenceTransformer
 from groq import Groq
-# ✅ Load Groq API key securely
 groq_api_key = os.getenv("GROQ_API_KEY")
 client = Groq(api_key=groq_api_key)
@@ -15,7 +15,6 @@ client = Groq(api_key=groq_api_key)
 model = SentenceTransformer('all-MiniLM-L6-v2')
 stored_chunks = []
-stored_embeddings = None
 stored_index = None
 def extract_text_from_pdf(pdf_path):
@@ -25,23 +24,8 @@ def extract_text_from_pdf(pdf_path):
         text += page.get_text()
     return text
-def chunk_text(text, max_chunk_size=500):
-    words = text.split()
-    chunks = [' '.join(words[i:i+max_chunk_size]) for i in range(0, len(words), max_chunk_size)]
-    return chunks
-def embed_chunks(chunks):
-    embeddings = model.encode(chunks)
-    return np.array(embeddings)
-def build_faiss_index(embeddings):
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(embeddings)
-    return index
 def handle_pdf(file):
-    global stored_chunks, stored_embeddings, stored_index
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
         tmp.write(file.read())
@@ -49,32 +33,24 @@ def handle_pdf(file):
     text = extract_text_from_pdf(tmp_path)
-    # Chunking
-    chunks = chunk_text(text)
-    chunk_comment = f"✅ Chunking Done: {len(chunks)} chunks created."
-    # Tokenization
-    embeddings = embed_chunks(chunks)
-    token_comment = f"✅ Tokenization Done: Embeddings shape {embeddings.shape}."
-    # Vector DB
-    index = build_faiss_index(embeddings)
-    vector_comment = f"✅ Vector DB Created: FAISS index with {index.ntotal} vectors."
     stored_chunks = chunks
-    stored_embeddings = embeddings
     stored_index = index
-    return chunk_comment, token_comment, vector_comment
 def answer_query(query):
-    if stored_index is None or not stored_chunks:
         return "❌ Please upload and process a PDF first."
     query_vec = model.encode([query])
     D, I = stored_index.search(np.array([query_vec]), k=3)
     top_chunks = [stored_chunks[i] for i in I[0]]
     context = "\n\n".join(top_chunks)
     prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""
@@ -91,32 +67,18 @@ def answer_query(query):
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 RAG PDF Chat with Groq + LLaMA")
-    with gr.Row():
-        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
-        process_button = gr.Button("📥 Process PDF")
-    chunk_output = gr.Textbox(label="Chunking Status")
-    token_output = gr.Textbox(label="Tokenization Status")
-    vector_output = gr.Textbox(label="Vector DB Status")
-    process_button.click(
-        fn=handle_pdf,
-        inputs=[file_input],
-        outputs=[chunk_output, token_output, vector_output]
-    )
-    gr.Markdown("## 💬 Ask a Question About the Document")
-    question_input = gr.Textbox(label="Your Question")
-    ask_button = gr.Button("🤖 Ask")
-    answer_output = gr.Textbox(label="Answer", lines=5)
-    ask_button.click(
-        fn=answer_query,
-        inputs=[question_input],
-        outputs=[answer_output]
-    )
 demo.launch()

 from sentence_transformers import SentenceTransformer
 from groq import Groq
+# ✅ Load Groq API key securely from Hugging Face secret
 groq_api_key = os.getenv("GROQ_API_KEY")
 client = Groq(api_key=groq_api_key)
 model = SentenceTransformer('all-MiniLM-L6-v2')
 stored_chunks = []
 stored_index = None
 def extract_text_from_pdf(pdf_path):
         text += page.get_text()
     return text
 def handle_pdf(file):
+    global stored_chunks, stored_index
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
         tmp.write(file.read())
     text = extract_text_from_pdf(tmp_path)
+    # Chunk and embed
+    chunks = [' '.join(text.split()[i:i+500]) for i in range(0, len(text.split()), 500)]
+    embeddings = model.encode(chunks)
+    index = faiss.IndexFlatL2(embeddings.shape[1])
+    index.add(np.array(embeddings))
     stored_chunks = chunks
     stored_index = index
+    return "✅ PDF processed successfully. You can now ask a question."
 def answer_query(query):
+    if not stored_chunks or stored_index is None:
         return "❌ Please upload and process a PDF first."
     query_vec = model.encode([query])
     D, I = stored_index.search(np.array([query_vec]), k=3)
     top_chunks = [stored_chunks[i] for i in I[0]]
     context = "\n\n".join(top_chunks)
     prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Ask Your PDF - Powered by Groq + LLaMA")
+    file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+    status_output = gr.Textbox(label="Status")
+    file_input.change(fn=handle_pdf, inputs=file_input, outputs=status_output)
+    gr.Markdown("## 💬 Ask a Question About Your PDF")
+    question = gr.Textbox(label="Your Question")
+    ask_button = gr.Button("Ask")
+    answer = gr.Textbox(label="Answer", lines=5)
+    ask_button.click(fn=answer_query, inputs=question, outputs=answer)
 demo.launch()