Spaces:

ChatBotsTA
/

pdf-summary

Sleeping

App Files Files Community

ChatBotsTA commited on Sep 5, 2025

Commit

6db8576

verified ·

1 Parent(s): cca24fc

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -133

app.py CHANGED Viewed

@@ -1,193 +1,284 @@
-# streamlit_pdf_qa.py
 import os
 import streamlit as st
-import openai
 from PyPDF2 import PdfReader
-import requests
-import re
 from typing import List, Optional
 # ============ CONFIG =============
-openai.api_key = os.getenv("OPENAI_API_KEY")
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
-# optional: allow switching model by env or fallback
-OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")  # fallback to what's set
 # ============ HELPERS ============
 def clean_text(text: str) -> str:
-    text = re.sub(r"\s+", " ", text)
-    return text.strip()
-@st.cache_data(show_spinner=False)
 def extract_text_from_pdf(uploaded_file) -> str:
-    """
-    Extract all text from a PDF UploadFile (or file-like)
-    """
     reader = PdfReader(uploaded_file)
-    text_parts = []
     for page in reader.pages:
-        page_text = page.extract_text()
-        if page_text:
-            text_parts.append(page_text)
-    return clean_text(" ".join(text_parts))
 def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
-    """
-    Chunk text by character length. Overlap helps keep context across chunks.
-    """
     chunks = []
     start = 0
-    text_len = len(text)
-    while start < text_len:
         end = start + chunk_size
         chunks.append(text[start:end])
         start = max(end - overlap, end)
     return chunks
-def call_openai_chat(messages: list, max_tokens: int = 1000, temperature: float = 0.2) -> str:
-    if not openai.api_key:
-        raise RuntimeError("OPENAI_API_KEY not set")
     try:
-        response = openai.ChatCompletion.create(
-            model=OPENAI_MODEL,
-            messages=messages,
-            max_tokens=max_tokens,
-            temperature=temperature,
-        )
-        # robust extraction of content
-        content = None
-        if response and "choices" in response and len(response["choices"]) > 0:
-            choice = response["choices"][0]
-            # choice may contain 'message' dict
-            if "message" in choice and "content" in choice["message"]:
-                content = choice["message"]["content"]
-            # fallback
-            elif "text" in choice:
-                content = choice["text"]
-        return content or ""
     except Exception as e:
-        # raise the exception upward so UI can show it
-        raise
-def ask_gpt(question: str, context: str) -> str:
-    prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
     messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": prompt},
     ]
-    return call_openai_chat(messages, max_tokens=600)
-def summarize_text(text: str) -> str:
-    prompt = f"Summarize the following text in a clear, concise way:\n\n{text}"
     messages = [
-        {"role": "system", "content": "You are a concise summarizer."},
         {"role": "user", "content": prompt},
     ]
-    return call_openai_chat(messages, max_tokens=400)
 def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
-    """
-    Send text to ElevenLabs text-to-speech API.
-    Returns raw audio bytes or None on failure.
-    """
     if not ELEVEN_API_KEY:
         return None
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
-    headers = {
-        "Accept": "audio/mpeg",
-        "xi-api-key": ELEVEN_API_KEY,
-        "Content-Type": "application/json"
-    }
-    data = {
-        "text": text,
-        "model_id": "eleven_multilingual_v2",
-        "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
-    }
-    try:
-        resp = requests.post(url, json=data, headers=headers, timeout=30)
-        if resp.ok:
-            return resp.content
-        else:
-            st.warning(f"ElevenLabs TTS failed: {resp.status_code} {resp.text[:300]}")
-            return None
-    except Exception as e:
-        st.warning(f"ElevenLabs TTS error: {e}")
         return None
-# ============ STREAMLIT APP ============
-st.set_page_config(page_title="PDF Q&A", layout="wide")
-st.title("📄 PDF Q&A with Summarization + Audio")
-# show API key status
-col1, col2 = st.columns(2)
-with col1:
-    if openai.api_key:
-        st.success("OpenAI key detected ✅")
     else:
-        st.error("OPENAI_API_KEY is not set. Chat features will not work.")
-with col2:
     if ELEVEN_API_KEY:
         st.success("ElevenLabs key detected ✅")
     else:
-        st.info("ELEVEN_API_KEY not set. Audio playback will be disabled.")
-uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
-if uploaded_file is not None:
     try:
-        with st.spinner("Extracting text from PDF..."):
             raw_text = extract_text_from_pdf(uploaded_file)
     except Exception as e:
-        st.error(f"Failed to extract PDF text: {e}")
         raw_text = ""
     if not raw_text:
-        st.warning("No text was extracted from this PDF. It may be scanned images (OCR needed).")
     else:
-        st.success("PDF loaded successfully ✅")
-        st.markdown(f"**Extracted text length:** {len(raw_text)} characters")
-        # Summarize button
-        if st.button("Summarize Document"):
-            with st.spinner("Summarizing..."):
-                try:
-                    # limit input size for summarization
-                    to_summarize = raw_text[:15000]
-                    summary = summarize_text(to_summarize)
-                    st.subheader("📌 Summary")
-                    st.write(summary)
-                    audio_bytes = text_to_speech_eleven(summary)
-                    if audio_bytes:
-                        st.audio(audio_bytes, format="audio/mp3")
-                    elif ELEVEN_API_KEY is None:
-                        st.info("TTS skipped because ELEVEN_API_KEY is not set.")
-                except Exception as e:
-                    st.error(f"Summarization failed: {e}")
-        # Q&A textbox
-        query = st.text_input("Ask a question about the PDF:")
         if query:
-            with st.spinner("Thinking..."):
                 try:
-                    chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
-                    # keep a small number of chunks to control cost/time
-                    answers = []
-                    max_chunks = 3
-                    for i, c in enumerate(chunks[:max_chunks]):
-                        ans = ask_gpt(query, c)
-                        answers.append(ans)
-                    final_answer = "\n\n".join([a for a in answers if a])
-                    if not final_answer.strip():
-                        st.warning("No answer returned from the model.")
-                    else:
-                        st.subheader("💡 Answer")
-                        st.write(final_answer)
-                        audio = text_to_speech_eleven(final_answer)
-                        if audio:
-                            st.audio(audio, format="audio/mp3")
-                        elif ELEVEN_API_KEY is None:
-                            st.info("TTS skipped because ELEVEN_API_KEY is not set.")
                 except Exception as e:
                     st.error(f"Q&A failed: {e}")
 else:

+# app.py
 import os
+import re
+import json
+import requests
 import streamlit as st
 from PyPDF2 import PdfReader
 from typing import List, Optional
 # ============ CONFIG =============
+OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY")
+OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-4o-mini")  # change if you prefer
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
+HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_API_KEY")  # optional: if set, we'll call a HF mermaid model
+HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B")  # example community model
 # ============ HELPERS ============
 def clean_text(text: str) -> str:
+    return re.sub(r"\s+", " ", text or "").strip()
 def extract_text_from_pdf(uploaded_file) -> str:
     reader = PdfReader(uploaded_file)
+    parts = []
     for page in reader.pages:
+        t = page.extract_text()
+        if t:
+            parts.append(t)
+    return clean_text(" ".join(parts))
 def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
+    if not text:
+        return []
     chunks = []
     start = 0
+    while start < len(text):
         end = start + chunk_size
         chunks.append(text[start:end])
         start = max(end - overlap, end)
     return chunks
+# ---------- OpenRouter chat (replacement for openai.ChatCompletion) ----------
+def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tokens: int = 800, temperature: float = 0.2) -> str:
+    """
+    Send messages (OpenAI-style) to OpenRouter's chat completions endpoint.
+    Requires OPENROUTER_API_KEY in ENV.
+    """
+    if not OPENROUTER_KEY:
+        raise RuntimeError("OPENROUTER_API_KEY not set")
+    url = "https://api.openrouter.ai/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {OPENROUTER_KEY}", "Content-Type": "application/json"}
+    payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+    }
+    resp = requests.post(url, json=payload, headers=headers, timeout=60)
     try:
+        resp.raise_for_status()
     except Exception as e:
+        raise RuntimeError(f"OpenRouter API error: {resp.status_code} {resp.text}") from e
+    data = resp.json()
+    # robustly extract text
+    text = ""
+    try:
+        choices = data.get("choices", [])
+        if choices:
+            c = choices[0]
+            # OpenRouter returns similar shape to OpenAI
+            if "message" in c and "content" in c["message"]:
+                text = c["message"]["content"]
+            elif "text" in c:
+                text = c["text"]
+    except Exception:
+        text = ""
+    return text or ""
+def ask_model_for_summary(text: str) -> str:
+    prompt = f"Summarize the following text clearly and concisely (bullet points, 5-8 bullets max):\n\n{text}"
     messages = [
+        {"role": "system", "content": "You are a concise summarizer."},
         {"role": "user", "content": prompt},
     ]
+    return openrouter_chat(messages, max_tokens=400)
+def ask_model_question(question: str, context: str) -> str:
+    prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer in a concise helpful way."
     messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": prompt},
     ]
+    return openrouter_chat(messages, max_tokens=600)
+# ---------- ElevenLabs TTS ----------
 def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
     if not ELEVEN_API_KEY:
         return None
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+    headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
+    data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}}
+    r = requests.post(url, json=data, headers=headers, timeout=30)
+    if r.ok:
+        return r.content
+    else:
+        st.warning(f"ElevenLabs TTS failed: {r.status_code} {r.text[:300]}")
         return None
+# ---------- Mermaid generation (Hugging Face model optional) ----------
+def call_hf_mermaid(prompt: str, model: str = HF_MERMAID_MODEL) -> Optional[str]:
+    """
+    If HUGGINGFACE_KEY is set, call Hugging Face Inference API for model that outputs Mermaid or Mermaid-like code.
+    Many community models/Spaces are simple text-output LLMs that can be prompted to return mermaid code.
+    """
+    if not HUGGINGFACE_KEY:
+        return None
+    url = f"https://api-inference.huggingface.co/models/{model}"
+    headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}", "Accept": "application/json"}
+    payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2}}
+    r = requests.post(url, headers=headers, json=payload, timeout=60)
+    if not r.ok:
+        st.warning(f"Hugging Face model call failed: {r.status_code} {r.text[:300]}")
+        return None
+    j = r.json()
+    # shape varies by model; try to extract text
+    if isinstance(j, dict) and "error" in j:
+        st.warning(f"Hugging Face error: {j['error']}")
+        return None
+    if isinstance(j, list) and len(j) > 0 and isinstance(j[0], dict) and "generated_text" in j[0]:
+        return j[0]["generated_text"]
+    # some models return plain text in str
+    if isinstance(j, str):
+        return j
+    # fallback: try to get 'output' key
+    if isinstance(j, dict):
+        for k in ("generated_text", "output", "text"):
+            if k in j:
+                return j[k]
+    return None
+def generate_mermaid_from_summary(summary: str) -> str:
+    """
+    Try HF model first (if key provided). If not available or fails, produce a clean Mermaid flowchart locally.
+    We'll create a simple flow: split summary into sentences / bullets and link them sequentially.
+    """
+    # first try HF
+    prompt = (
+        "Given the following concise summary, produce a Mermaid flowchart (use 'graph TD' or 'flowchart TD' syntax). "
+        "Output only the Mermaid code block (no extra explanation). Summary:\n\n" + summary
+    )
+    hf_output = call_hf_mermaid(prompt)
+    if hf_output:
+        # try to extract just the mermaid text
+        # if the model wrapped in ```mermaid ... ``` try to strip
+        m = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", hf_output, re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+        return hf_output.strip()
+    # fallback local generator
+    # split by bullet/newline or sentences
+    lines = re.split(r"\n+|-{1,}\s*|•\s*", summary)
+    nodes = [clean_text(l) for l in lines if clean_text(l)]
+    # keep a reasonable number
+    nodes = nodes[:8]
+    if not nodes:
+        nodes = ["Summary empty"]
+    mermaid = "flowchart TD\n"
+    # create nodes with safe ids
+    for i, n in enumerate(nodes):
+        # short id
+        mermaid += f'  A{i}["{n.replace(\'"\', "\\\'")[:80]}"]\n'
+    for i in range(len(nodes) - 1):
+        mermaid += f"  A{i} --> A{i+1}\n"
+    return mermaid
+# ---------- Render mermaid in browser ----------
+def render_mermaid(mermaid_code: str, height: int = 400):
+    """
+    Render Mermaid chart client-side using mermaid.js in an HTML component.
+    """
+    # wrap in HTML that loads mermaid CDN
+    html = f"""
+    <div id="mermaid-target">
+      <pre class="mermaid">
+{mermaid_code}
+      </pre>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
+    <script>
+      mermaid.initialize({{startOnLoad:true}});
+    </script>
+    """
+    st.components.v1.html(html, height=height, scrolling=True)
+# ============ STREAMLIT UI ============
+st.set_page_config(page_title="PDF Q&A + Summary Diagram", layout="wide")
+st.title("📄 PDF Q&A + Summary Diagram + Audio")
+# API status
+c1, c2, c3 = st.columns(3)
+with c1:
+    if OPENROUTER_KEY:
+        st.success("OpenRouter key detected ✅")
     else:
+        st.error("OPENROUTER_API_KEY not set — summarization and Q&A will not work.")
+with c2:
+    if HUGGINGFACE_KEY:
+        st.success("Hugging Face key detected (will try HF mermaid model) ✅")
+    else:
+        st.info("No HUGGINGFACE_API_KEY — app will use local Mermaid fallback.")
+with c3:
     if ELEVEN_API_KEY:
         st.success("ElevenLabs key detected ✅")
     else:
+        st.info("No ELEVEN_API_KEY — audio disabled.")
+uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
+if uploaded_file:
     try:
+        with st.spinner("Extracting text..."):
             raw_text = extract_text_from_pdf(uploaded_file)
     except Exception as e:
+        st.error(f"PDF extraction failed: {e}")
         raw_text = ""
     if not raw_text:
+        st.warning("No text extracted. If the PDF is scanned images you need OCR (Tesseract) or an OCR service.")
     else:
+        st.success(f"Extracted {len(raw_text)} characters")
+        if st.button("Summarize and generate diagram"):
+            try:
+                with st.spinner("Summarizing with OpenRouter..."):
+                    # limit to avoid huge inputs
+                    to_sum = raw_text[:15000]
+                    summary = ask_model_for_summary(to_sum)
+                st.subheader("📌 Summary")
+                st.write(summary)
+                # TTS summary
+                audio = text_to_speech_eleven(summary)
+                if audio:
+                    st.audio(audio, format="audio/mp3")
+                elif not ELEVEN_API_KEY:
+                    st.info("TTS not available (ELEVEN_API_KEY missing).")
+                # produce mermaid
+                mermaid_code = generate_mermaid_from_summary(summary)
+                st.subheader("🗺️ Summary Diagram (Mermaid)")
+                render_mermaid(mermaid_code, height=480)
+                # also show the raw mermaid code for copy/paste
+                st.markdown("**Mermaid code (copy/paste):**")
+                st.code(mermaid_code, language="mermaid")
+            except Exception as e:
+                st.error(f"Summarize/diagram generation failed: {e}")
+        # Q&A box
+        query = st.text_input("Ask a question about the PDF (use Enter):")
         if query:
+            if not OPENROUTER_KEY:
+                st.error("Cannot answer — OPENROUTER_API_KEY is not set.")
+            else:
                 try:
+                    with st.spinner("Answering via OpenRouter..."):
+                        chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
+                        answers = []
+                        for c in chunks[:3]:  # limit to 3 chunks
+                            a = ask_model_question(query, c)
+                            if a:
+                                answers.append(a)
+                        final = "\n\n".join(answers).strip()
+                        if not final:
+                            st.warning("No answer returned from model.")
+                        else:
+                            st.subheader("💡 Answer")
+                            st.write(final)
+                            audio2 = text_to_speech_eleven(final)
+                            if audio2:
+                                st.audio(audio2, format="audio/mp3")
                 except Exception as e:
                     st.error(f"Q&A failed: {e}")
 else: