waqasbm commited on
Commit
34cbf68
Β·
verified Β·
1 Parent(s): 99afdd7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
+ import requests
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+ GROQ_API_KEY = os.getenv("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") # Put this in your .env file or Hugging Face secrets
10
+ GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
11
+ GROQ_MODEL = "llama3-8b-8192" # or use llama3-70b-8192 for more power
12
+
13
+ st.set_page_config(page_title="πŸ“„ PDF Data Extractor AI", layout="centered")
14
+ st.title("πŸ“„ Intelligent PDF Data Extractor & Summarizer")
15
+
16
+ st.markdown("""
17
+ Upload a PDF and extract key insights automatically using AI.
18
+ This tool helps improve decision-making, reduce errors, and boost productivity.
19
+ """)
20
+
21
+ uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])
22
+
23
+ def extract_text_from_pdf(file):
24
+ doc = fitz.open(stream=file.read(), filetype="pdf")
25
+ text = ""
26
+ for page in doc:
27
+ text += page.get_text()
28
+ return text
29
+
30
+ def query_groq(text, system_prompt):
31
+ headers = {
32
+ "Authorization": f"Bearer {GROQ_API_KEY}",
33
+ "Content-Type": "application/json"
34
+ }
35
+ payload = {
36
+ "model": GROQ_MODEL,
37
+ "messages": [
38
+ {"role": "system", "content": system_prompt},
39
+ {"role": "user", "content": text}
40
+ ],
41
+ "temperature": 0.2,
42
+ "max_tokens": 1024
43
+ }
44
+ response = requests.post(GROQ_API_URL, headers=headers, json=payload)
45
+ response.raise_for_status()
46
+ return response.json()["choices"][0]["message"]["content"]
47
+
48
+ if uploaded_file:
49
+ with st.spinner("πŸ” Extracting and summarizing..."):
50
+ raw_text = extract_text_from_pdf(uploaded_file)
51
+
52
+ # Summarize using GROQ
53
+ prompt = (
54
+ "You are an intelligent PDF data assistant. Read the document and extract a clear summary. "
55
+ "Highlight key insights, decisions, data points, and actionable information. "
56
+ "Return a structured summary that enhances decision-making and productivity."
57
+ )
58
+
59
+ try:
60
+ summary = query_groq(raw_text, prompt)
61
+ st.subheader("🧠 Extracted Summary")
62
+ st.success(summary)
63
+
64
+ st.markdown("---")
65
+ st.caption("βœ… Powered by GROQ LLaMA and PyMuPDF. Safe and secure local processing.")
66
+
67
+ except Exception as e:
68
+ st.error(f"❌ Failed to extract summary: {e}")
69
+ else:
70
+ st.info("πŸ“₯ Please upload a PDF file to begin.")