Spaces:

waqasbm
/

Data_Extractor_Tool

Sleeping

App Files Files Community

waqasbm commited on May 19, 2025

Commit

34cbf68

verified ·

1 Parent(s): 99afdd7

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import streamlit as st
+import fitz  # PyMuPDF
+import requests
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+GROQ_API_KEY = os.getenv("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")  # Put this in your .env file or Hugging Face secrets
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+GROQ_MODEL = "llama3-8b-8192"  # or use llama3-70b-8192 for more power
+st.set_page_config(page_title="📄 PDF Data Extractor AI", layout="centered")
+st.title("📄 Intelligent PDF Data Extractor & Summarizer")
+st.markdown("""
+Upload a PDF and extract key insights automatically using AI.
+This tool helps improve decision-making, reduce errors, and boost productivity.
+""")
+uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])
+def extract_text_from_pdf(file):
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def query_groq(text, system_prompt):
+    headers = {
+        "Authorization": f"Bearer {GROQ_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": GROQ_MODEL,
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": text}
+        ],
+        "temperature": 0.2,
+        "max_tokens": 1024
+    }
+    response = requests.post(GROQ_API_URL, headers=headers, json=payload)
+    response.raise_for_status()
+    return response.json()["choices"][0]["message"]["content"]
+if uploaded_file:
+    with st.spinner("🔍 Extracting and summarizing..."):
+        raw_text = extract_text_from_pdf(uploaded_file)
+        # Summarize using GROQ
+        prompt = (
+            "You are an intelligent PDF data assistant. Read the document and extract a clear summary. "
+            "Highlight key insights, decisions, data points, and actionable information. "
+            "Return a structured summary that enhances decision-making and productivity."
+        )
+        try:
+            summary = query_groq(raw_text, prompt)
+            st.subheader("🧠 Extracted Summary")
+            st.success(summary)
+            st.markdown("---")
+            st.caption("✅ Powered by GROQ LLaMA and PyMuPDF. Safe and secure local processing.")
+        except Exception as e:
+            st.error(f"❌ Failed to extract summary: {e}")
+else:
+    st.info("📥 Please upload a PDF file to begin.")