Spaces:

ChatBotsTA
/

pdf-summarize

Build error

App Files Files Community

ChatBotsTA commited on Sep 4

Commit

8945838

verified ·

1 Parent(s): f33a5dc

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -101

app.py CHANGED Viewed

@@ -7,135 +7,199 @@ from huggingface_hub import InferenceClient
 import pdfplumber
 from PIL import Image
 import base64
-# ---------- Configuration ----------
-HF_TOKEN = os.environ.get("HF_TOKEN")  # required
-GROQ_KEY = os.environ.get("GROQ_API_KEY")  # optional: if you want to call Groq directly
-USE_GROQ_PROVIDER = True  # set False to route to default HF provider
-# model IDs (change if you prefer other models)
-LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"        # Groq Llama model on HF
-TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"       # a HF-hosted TTS model example
-SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"  # SDXL base model
-# create Inference client (route via HF token by default)
-if USE_GROQ_PROVIDER:
-    client = InferenceClient(provider="groq", api_key=HF_TOKEN)
-else:
-    client = InferenceClient(api_key=HF_TOKEN)
 # ---------- Helpers ----------
-def pdf_to_text(uploaded_file) -> str:
     text_chunks = []
-    with pdfplumber.open(uploaded_file) as pdf:
         for page in pdf.pages:
             ptext = page.extract_text()
             if ptext:
                 text_chunks.append(ptext)
     return "\n\n".join(text_chunks)
-def llama_summarize(text, max_tokens=512):
-    prompt = [
-        {"role": "system", "content": "You are a concise summarizer. Produce a clear summary in bullet points."},
-        {"role": "user", "content": f"Summarize the following document in <= 8 bullet points. Keep it short:\n\n{text}"}
     ]
-    # Use chat completion endpoint style
-    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=prompt)
     try:
-        summary = resp.choices[0].message["content"]
     except Exception:
-        # fallback: try text generation field
-        summary = resp.choices[0].text if hasattr(resp.choices[0], "text") else str(resp)
-    return summary
-def llama_chat(chat_history, user_question):
-    messages = chat_history + [{"role":"user","content":user_question}]
-    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
-    return resp.choices[0].message["content"]
-def tts_synthesize(text) -> bytes:
-    # InferenceClient offers text->audio utilities. This returns raw audio bytes (wav).
-    audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
-    return audio_bytes
-def generate_image(prompt_text) -> Image.Image:
-    img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
-    return Image.open(io.BytesIO(img_bytes))
-def audio_download_button(wav_bytes, filename="summary.wav"):
-    b64 = base64.b64encode(wav_bytes).decode()
-    href = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">Download audio (WAV)</a>'
-    st.markdown(href, unsafe_allow_html=True)
-# ---------- Streamlit UI ----------
-st.set_page_config(page_title="PDFGPT (Groq + HF)", layout="wide")
-st.title("PDF → Summary + Speech + Chat + Diagram (Groq + HF)")
-uploaded = st.file_uploader("Upload PDF", type=["pdf"])
 if uploaded:
     with st.spinner("Extracting text from PDF..."):
-        text = pdf_to_text(uploaded)
-    st.subheader("Extracted text (preview)")
-    st.text_area("Document text", value=text[:1000], height=200)
-    if st.button("Create summary (Groq Llama)"):
-        with st.spinner("Summarizing with Groq Llama..."):
-            summary = llama_summarize(text)
-        st.subheader("Summary")
-        st.write(summary)
-        st.session_state["summary"] = summary
-    if "summary" in st.session_state:
-        summary = st.session_state["summary"]
-        if st.button("Synthesize audio from summary (TTS)"):
-            with st.spinner("Creating audio..."):
                 try:
-                    audio = tts_synthesize(summary)
-                    st.audio(audio)
-                    audio_download_button(audio)
                 except Exception as e:
-                    st.error(f"TTS failed: {e}")
-    st.markdown("---")
-    st.subheader("Chat with your PDF (ask questions about document)")
-    if "chat_history" not in st.session_state:
-        # start with system + doc context (shortened)
-        doc_context = (text[:4000] + "...") if len(text) > 4000 else text
-        st.session_state["chat_history"] = [
-            {"role":"system","content":"You are a helpful assistant that answers questions based on the provided document."},
-            {"role":"user","content": f"Document context:\n{doc_context}"}
-        ]
-    user_q = st.text_input("Ask a question about the PDF")
-    if st.button("Ask") and user_q:
-        with st.spinner("Getting answer from Groq Llama..."):
-            answer = llama_chat(st.session_state["chat_history"], user_q)
-            st.session_state.setdefault("convo", []).append(("You", user_q))
-            st.session_state.setdefault("convo", []).append(("Assistant", answer))
-            # append to history for next calls
-            st.session_state["chat_history"].append({"role":"user","content":user_q})
-            st.session_state["chat_history"].append({"role":"assistant","content":answer})
-            st.write(answer)
     st.markdown("---")
-    st.subheader("Generate a diagram from your question (SDXL)")
     diagram_prompt = st.text_input("Describe the diagram or scene to generate")
-    if st.button("Generate diagram") and diagram_prompt:
-        with st.spinner("Generating image (SDXL)..."):
             try:
                 img = generate_image(diagram_prompt)
                 st.image(img, use_column_width=True)
-                # allow download
                 buf = io.BytesIO()
                 img.save(buf, format="PNG")
                 st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
             except Exception as e:
-                st.error(f"Image generation failed: {e}")
-st.sidebar.title("Settings")
-st.sidebar.write("Models in use:")
 st.sidebar.write(f"LLM: {LLAMA_MODEL}")
 st.sidebar.write(f"TTS: {TTS_MODEL}")
 st.sidebar.write(f"Image: {SDXL_MODEL}")
-st.sidebar.markdown("**Notes**\n- Set HF_TOKEN in Space secrets or environment before starting.\n- To route directly to Groq with your Groq API key, set `GROQ_API_KEY` and change the client init accordingly.")

 import pdfplumber
 from PIL import Image
 import base64
+from typing import Optional
+st.set_page_config(page_title="PDF → Summary + TTS + Chat + Diagram", layout="wide")
+# ---------- Config (models - change if you prefer others) ----------
+LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"        # Groq Llama model on HF (example)
+TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"       # example TTS model on HF
+SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"  # SDXL model on HF
+# ---------- Secrets: HF_TOKEN and GROQ_TOKEN ----------
+HF_TOKEN = os.environ.get("HF_TOKEN")
+GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
+# ---------- Init InferenceClient ----------
+client: Optional[InferenceClient] = None
+client_info = ""
+try:
+    if GROQ_TOKEN:
+        # Prefer Groq provider if GROQ_TOKEN present
+        client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
+        client_info = "Using Groq provider (GROQ_TOKEN)"
+    elif HF_TOKEN:
+        client = InferenceClient(api_key=HF_TOKEN)
+        client_info = "Using Hugging Face Inference (HF_TOKEN)"
+    else:
+        client_info = "NO TOKEN FOUND"
+except Exception as e:
+    client_info = f"Failed to initialize InferenceClient: {e}"
+    client = None
 # ---------- Helpers ----------
+def pdf_to_text_bytes(file_bytes: bytes) -> str:
     text_chunks = []
+    with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
         for page in pdf.pages:
             ptext = page.extract_text()
             if ptext:
                 text_chunks.append(ptext)
     return "\n\n".join(text_chunks)
+def llama_summarize(text: str) -> str:
+    if client is None:
+        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    # Create simple system+user prompt
+    messages = [
+        {"role": "system", "content": "You are a concise summarizer. Provide a short summary in bullet points."},
+        {"role": "user", "content": f"Summarize the following document in 6-8 concise bullet points:\n\n{text}"}
     ]
+    # Try chat completions API path, fallback to text generation if necessary
     try:
+        resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
+        return resp.choices[0].message["content"]
     except Exception:
+        try:
+            # fallback: text generation (single string)
+            resp2 = client.text_generation(model=LLAMA_MODEL, inputs="Summarize:\n\n" + text, max_new_tokens=512)
+            # resp2 may be dict-like or object; try a few access patterns
+            if isinstance(resp2, dict) and "generated_text" in resp2:
+                return resp2["generated_text"]
+            # try attribute access
+            return str(resp2)
+        except Exception as e:
+            raise RuntimeError(f"Summarization failed: {e}")
+def llama_chat(chat_history: list, user_question: str) -> str:
+    if client is None:
+        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    messages = chat_history + [{"role": "user", "content": user_question}]
+    try:
+        resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
+        return resp.choices[0].message["content"]
+    except Exception as e:
+        raise RuntimeError(f"Chat completion failed: {e}")
+def tts_synthesize(text: str) -> bytes:
+    if client is None:
+        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    try:
+        audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
+        return audio_bytes
+    except Exception as e:
+        raise RuntimeError(f"TTS failed: {e}")
+def generate_image(prompt_text: str) -> Image.Image:
+    if client is None:
+        raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
+    try:
+        img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
+        return Image.open(io.BytesIO(img_bytes))
+    except Exception as e:
+        raise RuntimeError(f"Image generation failed: {e}")
+def make_download_link_bytes(data: bytes, filename: str, mime: str):
+    b64 = base64.b64encode(data).decode()
+    href = f'<a href="data:{mime};base64,{b64}" download="{filename}">Download {filename}</a>'
+    return href
+# ---------- UI ----------
+st.title("PDF → Summary + TTS + Chat + Diagram (Groq/HF)")
+st.sidebar.markdown("### Runtime info")
+st.sidebar.write(client_info)
+st.sidebar.markdown("**Required env vars**: `HF_TOKEN` and/or `GROQ_TOKEN`. Prefer `GROQ_TOKEN` for Groq provider.")
+if client is None:
+    st.error("Inference client not initialized. Set HF_TOKEN or GROQ_TOKEN as environment variables in your Space.")
+    st.stop()
+uploaded = st.file_uploader("Upload a PDF to analyze", type=["pdf"])
 if uploaded:
+    file_bytes = uploaded.read()
     with st.spinner("Extracting text from PDF..."):
+        try:
+            text = pdf_to_text_bytes(file_bytes)
+        except Exception as e:
+            st.error(f"Failed to extract text from PDF: {e}")
+            text = ""
+    st.subheader("Document preview (first 2000 chars)")
+    st.text_area("", value=(text[:2000] + ("..." if len(text) > 2000 else "")), height=220)
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Create summary"):
+            if not text.strip():
+                st.error("Document text empty or extraction failed.")
+            else:
+                with st.spinner("Summarizing with Llama..."):
+                    try:
+                        summary = llama_summarize(text)
+                        st.session_state["summary"] = summary
+                        st.subheader("Summary")
+                        st.markdown(summary)
+                    except Exception as e:
+                        st.error(str(e))
+        if "summary" in st.session_state:
+            summary = st.session_state["summary"]
+            if st.button("Synthesize summary to audio"):
+                with st.spinner("Generating speech..."):
+                    try:
+                        wav = tts_synthesize(summary)
+                        st.audio(wav)
+                        st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
+                    except Exception as e:
+                        st.error(str(e))
+    with col2:
+        st.subheader("Chat with the document")
+        if "chat_history" not in st.session_state:
+            doc_context = text[:4000] if text else ""
+            st.session_state["chat_history"] = [
+                {"role":"system","content":"You are an assistant that answers questions based only on the provided document context."},
+                {"role":"user","content": f"Document context:\n{doc_context}"}
+            ]
+            st.session_state["convo_display"] = []
+        user_q = st.text_input("Ask a question about the PDF")
+        if st.button("Ask question") and user_q.strip():
+            with st.spinner("Getting answer from Llama..."):
                 try:
+                    answer = llama_chat(st.session_state["chat_history"], user_q)
+                    # show and store
+                    st.session_state["convo_display"].append(("You", user_q))
+                    st.session_state["convo_display"].append(("Assistant", answer))
+                    st.session_state["chat_history"].append({"role":"user","content":user_q})
+                    st.session_state["chat_history"].append({"role":"assistant","content":answer})
                 except Exception as e:
+                    st.error(str(e))
+        # show conversation
+        for speaker, textline in st.session_state.get("convo_display", []):
+            if speaker == "You":
+                st.markdown(f"**You:** {textline}")
+            else:
+                st.markdown(f"**Assistant:** {textline}")
     st.markdown("---")
+    st.subheader("Generate diagram/image from prompt (SDXL)")
     diagram_prompt = st.text_input("Describe the diagram or scene to generate")
+    if st.button("Generate diagram") and diagram_prompt.strip():
+        with st.spinner("Generating image..."):
             try:
                 img = generate_image(diagram_prompt)
                 st.image(img, use_column_width=True)
                 buf = io.BytesIO()
                 img.save(buf, format="PNG")
                 st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
             except Exception as e:
+                st.error(str(e))
+st.sidebar.markdown("---")
+st.sidebar.markdown("### Model IDs (change in app.py if you want)")
 st.sidebar.write(f"LLM: {LLAMA_MODEL}")
 st.sidebar.write(f"TTS: {TTS_MODEL}")
 st.sidebar.write(f"Image: {SDXL_MODEL}")