Spaces:

ChatBotsTA
/

pdf-summarize

Build error

App Files Files Community

ChatBotsTA commited on Sep 4, 2025

Commit

cf0600b

verified ·

1 Parent(s): 17e0e64

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -151

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # app.py
 import os
 import io
-import tempfile
 import streamlit as st
 from huggingface_hub import InferenceClient
 import pdfplumber
@@ -9,185 +8,178 @@ from PIL import Image
 import base64
 from typing import Optional
-st.set_page_config(page_title="PDF → Summary + TTS + Chat + Diagram", layout="wide")
-# ---------- Config (models - change if you prefer others) ----------
-LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"        # Groq Llama model on HF (example)
-TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"       # example TTS model on HF
-SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"  # SDXL model on HF
-# ---------- Secrets: HF_TOKEN and GROQ_TOKEN ----------
 HF_TOKEN = os.environ.get("HF_TOKEN")
 GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
-# ---------- Init InferenceClient ----------
 client: Optional[InferenceClient] = None
-client_info = ""
 try:
     if GROQ_TOKEN:
-        # Prefer Groq provider if GROQ_TOKEN present
         client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
-        client_info = "Using Groq provider (GROQ_TOKEN)"
     elif HF_TOKEN:
         client = InferenceClient(api_key=HF_TOKEN)
-        client_info = "Using Hugging Face Inference (HF_TOKEN)"
-    else:
-        client_info = "NO TOKEN FOUND"
-except Exception as e:
-    client_info = f"Failed to initialize InferenceClient: {e}"
     client = None
-# ---------- Helpers ----------
-def pdf_to_text_bytes(file_bytes: bytes) -> str:
     text_chunks = []
-    with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
-        for page in pdf.pages:
-            ptext = page.extract_text()
-            if ptext:
                 text_chunks.append(ptext)
-    return "\n\n".join(text_chunks)
 def llama_summarize(text: str) -> str:
     if client is None:
-        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
-    # Create simple system+user prompt
     messages = [
-        {"role": "system", "content": "You are a concise summarizer. Provide a short summary in bullet points."},
-        {"role": "user", "content": f"Summarize the following document in 6-8 concise bullet points:\n\n{text}"}
     ]
-    # Try chat completions API path, fallback to text generation if necessary
-    try:
-        resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
-        return resp.choices[0].message["content"]
-    except Exception:
-        try:
-            # fallback: text generation (single string)
-            resp2 = client.text_generation(model=LLAMA_MODEL, inputs="Summarize:\n\n" + text, max_new_tokens=512)
-            # resp2 may be dict-like or object; try a few access patterns
-            if isinstance(resp2, dict) and "generated_text" in resp2:
-                return resp2["generated_text"]
-            # try attribute access
-            return str(resp2)
-        except Exception as e:
-            raise RuntimeError(f"Summarization failed: {e}")
 def llama_chat(chat_history: list, user_question: str) -> str:
     if client is None:
-        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
     messages = chat_history + [{"role": "user", "content": user_question}]
-    try:
-        resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
-        return resp.choices[0].message["content"]
-    except Exception as e:
-        raise RuntimeError(f"Chat completion failed: {e}")
 def tts_synthesize(text: str) -> bytes:
     if client is None:
-        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
-    try:
-        audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
-        return audio_bytes
-    except Exception as e:
-        raise RuntimeError(f"TTS failed: {e}")
 def generate_image(prompt_text: str) -> Image.Image:
     if client is None:
-        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
-    try:
-        img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
-        return Image.open(io.BytesIO(img_bytes))
-    except Exception as e:
-        raise RuntimeError(f"Image generation failed: {e}")
 def make_download_link_bytes(data: bytes, filename: str, mime: str):
     b64 = base64.b64encode(data).decode()
-    href = f'<a href="data:{mime};base64,{b64}" download="{filename}">Download {filename}</a>'
     return href
-# ---------- UI ----------
-st.title("PDF → Summary + TTS + Chat + Diagram (Groq/HF)")
-st.sidebar.markdown("### Runtime info")
-st.sidebar.write(client_info)
-st.sidebar.markdown("**Required env vars**: `HF_TOKEN` and/or `GROQ_TOKEN`. Prefer `GROQ_TOKEN` for Groq provider.")
-if client is None:
-    st.error("Inference client not initialized. Set HF_TOKEN or GROQ_TOKEN as environment variables in your Space.")
-    st.stop()
-uploaded = st.file_uploader("Upload a PDF to analyze", type=["pdf"])
-if uploaded:
-    file_bytes = uploaded.read()
-    with st.spinner("Extracting text from PDF..."):
-        try:
-            text = pdf_to_text_bytes(file_bytes)
-        except Exception as e:
-            st.error(f"Failed to extract text from PDF: {e}")
-            text = ""
-    st.subheader("Document preview (first 2000 chars)")
-    st.text_area("", value=(text[:2000] + ("..." if len(text) > 2000 else "")), height=220)
-    col1, col2 = st.columns(2)
-    with col1:
-        if st.button("Create summary"):
-            if not text.strip():
-                st.error("Document text empty or extraction failed.")
-            else:
-                with st.spinner("Summarizing with Llama..."):
-                    try:
-                        summary = llama_summarize(text)
-                        st.session_state["summary"] = summary
-                        st.subheader("Summary")
-                        st.markdown(summary)
-                    except Exception as e:
-                        st.error(str(e))
-        if "summary" in st.session_state:
-            summary = st.session_state["summary"]
-            if st.button("Synthesize summary to audio"):
-                with st.spinner("Generating speech..."):
-                    try:
-                        wav = tts_synthesize(summary)
-                        st.audio(wav)
-                        st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
-                    except Exception as e:
-                        st.error(str(e))
-    with col2:
-        st.subheader("Chat with the document")
-        if "chat_history" not in st.session_state:
-            doc_context = text[:4000] if text else ""
-            st.session_state["chat_history"] = [
-                {"role":"system","content":"You are an assistant that answers questions based only on the provided document context."},
-                {"role":"user","content": f"Document context:\n{doc_context}"}
-            ]
-            st.session_state["convo_display"] = []
-        user_q = st.text_input("Ask a question about the PDF")
-        if st.button("Ask question") and user_q.strip():
-            with st.spinner("Getting answer from Llama..."):
-                try:
-                    answer = llama_chat(st.session_state["chat_history"], user_q)
-                    # show and store
-                    st.session_state["convo_display"].append(("You", user_q))
-                    st.session_state["convo_display"].append(("Assistant", answer))
-                    st.session_state["chat_history"].append({"role":"user","content":user_q})
-                    st.session_state["chat_history"].append({"role":"assistant","content":answer})
-                except Exception as e:
-                    st.error(str(e))
-        # show conversation
-        for speaker, textline in st.session_state.get("convo_display", []):
-            if speaker == "You":
-                st.markdown(f"**You:** {textline}")
-            else:
-                st.markdown(f"**Assistant:** {textline}")
     st.markdown("---")
-    st.subheader("Generate diagram/image from prompt (SDXL)")
-    diagram_prompt = st.text_input("Describe the diagram or scene to generate")
-    if st.button("Generate diagram") and diagram_prompt.strip():
         with st.spinner("Generating image..."):
             try:
                 img = generate_image(diagram_prompt)
@@ -196,10 +188,15 @@ if uploaded:
                 img.save(buf, format="PNG")
                 st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
             except Exception as e:
-                st.error(str(e))
-st.sidebar.markdown("---")
-st.sidebar.markdown("### Model IDs (change in app.py if you want)")
-st.sidebar.write(f"LLM: {LLAMA_MODEL}")
-st.sidebar.write(f"TTS: {TTS_MODEL}")
-st.sidebar.write(f"Image: {SDXL_MODEL}")

 # app.py
 import os
 import io
 import streamlit as st
 from huggingface_hub import InferenceClient
 import pdfplumber
 import base64
 from typing import Optional
+# ----------------- CONFIG -----------------
+LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"
+TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
+SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
+# Prefer Groq if token present, otherwise HF token
 client: Optional[InferenceClient] = None
 try:
     if GROQ_TOKEN:
         client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
     elif HF_TOKEN:
         client = InferenceClient(api_key=HF_TOKEN)
+except Exception:
     client = None
+# ----------------- PAGE STYLE -----------------
+st.set_page_config(page_title="PDF Buddy — Summarize • Speak • Chat • Draw", layout="wide")
+st.markdown(
+    """
+    <style>
+    .main > .block-container { padding: 1.5rem 2rem; max-width: 1100px; }
+    .title { font-size:28px; font-weight:700; color:#0f172a; }
+    .subtitle { color:#6b7280; margin-bottom:12px; }
+    .big-btn { font-weight:600; padding:10px 18px; border-radius:10px; }
+    .small-muted { color:#9ca3af; font-size:12px; }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+st.markdown('<div class="title">📄 PDF Buddy — Summarize • Speak • Chat • Draw</div>', unsafe_allow_html=True)
+st.markdown('<div class="subtitle">Upload a PDF, get a concise summary, speak it, ask questions, or generate diagrams from prompts.</div>', unsafe_allow_html=True)
+# ----------------- FUNCTIONS -----------------
+def pdf_to_text_bytes(file_bytes: bytes):
+    """Extract text using pdfplumber, return full text and page count."""
     text_chunks = []
+    try:
+        with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
+            total = len(pdf.pages)
+            for i, page in enumerate(pdf.pages):
+                ptext = page.extract_text() or ""
                 text_chunks.append(ptext)
+                # simple progress output handled by caller
+    except Exception as e:
+        raise RuntimeError(f"PDF parsing failed: {e}")
+    return "\n\n".join(text_chunks), total
 def llama_summarize(text: str) -> str:
     if client is None:
+        raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
     messages = [
+        {"role": "system", "content": "You are a concise summarizer. Give 6 short bullet points."},
+        {"role": "user", "content": f"Summarize this document in 6 concise bullet points:\n\n{text}"}
     ]
+    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
+    return resp.choices[0].message["content"]
 def llama_chat(chat_history: list, user_question: str) -> str:
     if client is None:
+        raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
     messages = chat_history + [{"role": "user", "content": user_question}]
+    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
+    return resp.choices[0].message["content"]
 def tts_synthesize(text: str) -> bytes:
     if client is None:
+        raise RuntimeError("TTS client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
+    return audio_bytes
 def generate_image(prompt_text: str) -> Image.Image:
     if client is None:
+        raise RuntimeError("Image generation client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
+    return Image.open(io.BytesIO(img_bytes))
 def make_download_link_bytes(data: bytes, filename: str, mime: str):
     b64 = base64.b64encode(data).decode()
+    href = f'<a href="data:{mime};base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
     return href
+# ----------------- STATE -----------------
+if "uploaded_name" not in st.session_state:
+    st.session_state.uploaded_name = None
+if "extracted_text" not in st.session_state:
+    st.session_state.extracted_text = ""
+if "summary" not in st.session_state:
+    st.session_state.summary = ""
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# ----------------- Uploader Column -----------------
+col_left, col_right = st.columns([1, 1])
+with col_left:
+    uploaded = st.file_uploader("Upload PDF (single file)", type=["pdf"], help="Drag & drop or click to choose a PDF.")
+    if uploaded is not None:
+        # immediate feedback to user
+        st.success(f"Uploaded file: **{uploaded.name}** — {round(len(uploaded.getvalue())/1024,1)} KB")
+        st.session_state.uploaded_name = uploaded.name
+        # extract text with progress
+        with st.spinner("Extracting text from PDF..."):
+            try:
+                bytes_in = uploaded.getvalue()
+                text, pages = pdf_to_text_bytes(bytes_in)
+                st.session_state.extracted_text = text
+                st.success(f"Extraction complete — {pages} pages processed. Preview shown below.")
+            except Exception as e:
+                st.session_state.extracted_text = ""
+                st.error(f"Failed to extract PDF text: {e}")
+    # show a preview (or hint)
+    if st.session_state.extracted_text:
+        st.subheader("Document preview (first 3000 chars)")
+        st.text_area("", value=(st.session_state.extracted_text[:3000] + ("..." if len(st.session_state.extracted_text) > 3000 else "")), height=240)
+    else:
+        st.info("No document loaded. Upload a PDF to get started. If your file is large, extraction may take a few seconds.")
+with col_right:
+    # Controls: disabled until extraction is available
+    disabled = not bool(st.session_state.extracted_text)
+    st.subheader("Actions")
+    if st.button("📝 Create summary", key="summarize", disabled=disabled):
+        with st.spinner("Creating summary..."):
+            try:
+                summary = llama_summarize(st.session_state.extracted_text[:30000])  # limit prompt length
+                st.session_state.summary = summary
+                st.success("Summary created.")
+            except Exception as e:
+                st.error(f"Summarization failed: {e}")
+    if st.session_state.summary:
+        st.markdown("**Summary:**")
+        st.markdown(st.session_state.summary)
+    if st.button("🔊 Synthesize summary to audio", key="tts", disabled=disabled or not st.session_state.summary):
+        with st.spinner("Synthesizing audio..."):
+            try:
+                wav = tts_synthesize(st.session_state.summary)
+                st.audio(wav)
+                st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
+            except Exception as e:
+                st.error(f"TTS failed: {e}")
+    st.markdown("---")
+    st.subheader("Chat with document")
+    if "chat_history" not in st.session_state or not st.session_state.chat_history:
+        # initialize with document context (short)
+        context = st.session_state.extracted_text[:4000] if st.session_state.extracted_text else ""
+        st.session_state.chat_history = [
+            {"role": "system", "content": "You are a helpful assistant. Answer strictly using the document context."},
+            {"role": "user", "content": f"Document context:\n{context}"}
+        ]
+    user_q = st.text_input("Ask a question about the PDF", key="user_q", disabled=disabled)
+    if st.button("❓ Ask", key="ask_btn", disabled=disabled or not user_q):
+        with st.spinner("Getting answer..."):
+            try:
+                ans = llama_chat(st.session_state.chat_history, user_q)
+                st.session_state.chat_history.append({"role": "user", "content": user_q})
+                st.session_state.chat_history.append({"role": "assistant", "content": ans})
+                st.markdown(f"**You:** {user_q}")
+                st.markdown(f"**Assistant:** {ans}")
+            except Exception as e:
+                st.error(f"Chat failed: {e}")
     st.markdown("---")
+    st.subheader("Generate diagram from prompt (SDXL)")
+    diagram_prompt = st.text_input("Describe diagram or scene", key="diagram_prompt", disabled=disabled)
+    if st.button("🖼️ Generate diagram", key="gen_img", disabled=disabled or not diagram_prompt):
         with st.spinner("Generating image..."):
             try:
                 img = generate_image(diagram_prompt)
                 img.save(buf, format="PNG")
                 st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
             except Exception as e:
+                st.error(f"Image generation failed: {e}")
+# ----------------- FOOTER / NOTES -----------------
+st.markdown("---")
+st.markdown(
+    """
+    **Notes**
+    - API keys are read from environment variables (HF_TOKEN and/or GROQ_TOKEN). They are NOT displayed here.
+    - If nothing happens after upload, try a small PDF (1–2 pages) to test extraction first.
+    - If you get errors about the LLM/TTS/Image calls, confirm the tokens are set in your Space settings or `.env` (don’t commit `.env` publicly).
+    """
+)