Spaces:

ChatBotsTA
/

pdf-summary

Sleeping

App Files Files Community

ChatBotsTA commited on Sep 5, 2025

Commit

d93d7bb

verified ·

1 Parent(s): 6401b0a

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -267

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py
 import os
 import re
 import tempfile
@@ -24,33 +24,15 @@ HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_API_KEY", st.secrets.get("HUGGINGFACE_A
 HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B")
 # ============ HELPERS ============
-# (rest of the helper functions from your original code are here, unchanged)
 def clean_text(text: str) -> str:
     return re.sub(r"\s+", " ", text or "").strip()
 def extract_text_from_pdf(uploaded_file) -> str:
     reader = PdfReader(uploaded_file)
-    parts = []
-    for page in reader.pages:
-        t = page.extract_text()
-        if t:
-            parts.append(t)
     return clean_text(" ".join(parts))
-def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
-    if not text:
-        return []
-    chunks = []
-    start = 0
-    while start < len(text):
-        end = start + chunk_size
-        chunks.append(text[start:end])
-        start = max(end - overlap, end)
-    return chunks
-# ============ OpenRouter wrapper (safe) ============
-def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tokens: int = 800, temperature: float = 0.2):
-    """Return tuple (success: bool, text_or_error: str)."""
     if not OPENROUTER_KEY:
         return False, "OPENROUTER_API_KEY not set"
     url = "https://api.openrouter.ai/v1/chat/completions"
@@ -60,294 +42,183 @@ def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tok
         resp = requests.post(url, json=payload, headers=headers, timeout=30)
         resp.raise_for_status()
         data = resp.json()
-        choices = data.get("choices", [])
-        if choices:
-            c = choices[0]
-            if "message" in c and isinstance(c["message"], dict):
-                content = c["message"].get("content")
-                if isinstance(content, dict) and "content" in content:
-                    return True, content["content"]
-                elif isinstance(content, str):
-                    return True, content
-            if "text" in c:
-                return True, c["text"]
-        if "text" in data:
-            return True, data["text"]
-        return False, "OpenRouter responded with unexpected shape"
     except Exception as e:
-        return False, f"OpenRouter request failed: {repr(e)}"
-# ============ Local extractive summarizer (offline) ============
 def extractive_summary(text: str, num_sentences: int = 6) -> str:
     if not text:
         return ""
     sentences = re.split(r'(?<=[.!?])\s+', text)
     words = re.findall(r'\w+', text.lower())
-    stopwords = set(["the","and","is","in","to","of","a","that","it","for","on","with","as","are","was","be","by","an","or"])
-    freq = {}
-    for w in words:
-        if w in stopwords or len(w) < 2:
-            continue
-        freq[w] = freq.get(w, 0) + 1
-    if not freq:
-        return "Unable to summarize (text too short)."
-    sent_scores = []
-    for s in sentences:
-        s_words = re.findall(r'\w+', s.lower())
-        score = sum(freq.get(w, 0) for w in s_words)
-        sent_scores.append((score, s))
-    sent_scores.sort(reverse=True, key=lambda x: x[0])
-    chosen = [s for _, s in sent_scores[:num_sentences]]
-    chosen_sorted = sorted(chosen, key=lambda s: text.find(s))
-    bullets = "\n".join(f"- {clean_text(s)}" for s in chosen_sorted if s.strip())
-    return bullets if bullets else clean_text(" ".join(chosen_sorted))
-# ============ ElevenLabs TTS (remote) ============
-def eleven_tts_bytes(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy"):
     if not ELEVEN_API_KEY:
         return False, "ELEVEN_API_KEY not set"
-    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
-    data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}}
     try:
         r = requests.post(url, json=data, headers=headers, timeout=30)
-        if r.ok:
-            return True, r.content
-        return False, f"ElevenLabs returned {r.status_code}: {r.text[:300]}"
     except Exception as e:
-        return False, f"ElevenLabs request failed: {repr(e)}"
-# ============ Local TTS fallback (pyttsx3) ============
 def pyttsx3_tts_file(text: str):
     if not HAS_PYTTSX3:
         return False, "pyttsx3 not installed"
     try:
         engine = pyttsx3.init()
-        tf = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-        tf_name = tf.name
-        tf.close()
-        engine.save_to_file(text, tf_name)
         engine.runAndWait()
-        with open(tf_name, "rb") as f:
-            b = f.read()
-        return True, b
     except Exception as e:
-        return False, f"pyttsx3 TTS failed: {repr(e)}"
-# ============ Hugging Face mermaid (optional) ============
-def call_hf_mermaid(prompt: str, model: str = HF_MERMAID_MODEL):
-    if not HUGGINGFACE_KEY:
-        return False, "HUGGINGFACE_API_KEY not set"
-    url = f"https://api-inference.huggingface.co/models/{model}"
-    headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}", "Accept": "application/json"}
-    payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2}}
-    try:
-        r = requests.post(url, headers=headers, json=payload, timeout=40)
-        if not r.ok:
-            return False, f"HuggingFace returned {r.status_code}: {r.text[:300]}"
-        j = r.json()
-        if isinstance(j, list) and len(j) > 0 and isinstance(j[0], dict) and "generated_text" in j[0]:
-            return True, j[0]["generated_text"]
-        if isinstance(j, str):
-            return True, j
-        if isinstance(j, dict):
-            for k in ("generated_text", "output", "text"):
-                if k in j:
-                    return True, j[k]
-        return False, "HF: unexpected response shape"
-    except Exception as e:
-        return False, f"HuggingFace request failed: {repr(e)}"
 def generate_mermaid_from_summary(summary: str):
-    prompt = ("Given the following concise summary, produce a Mermaid flowchart (use 'flowchart TD' syntax). "
-              "Output only the Mermaid code block. Summary:\n\n" + summary)
-    ok, hf_out = call_hf_mermaid(prompt)
-    if ok:
-        m = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", hf_out, re.IGNORECASE)
-        return hf_out if m is None else m.group(1).strip()
-    lines = re.split(r"\n+|-{1,}\s*|•\s*", summary)
-    nodes = [clean_text(l) for l in lines if clean_text(l)]
-    nodes = nodes[:8]
     if not nodes:
-        nodes = ["Summary empty"]
-    mermaid = "flowchart TD\n"
-    for i, n in enumerate(nodes):
-        node_text = n.replace('"', "'")[:80]
-        mermaid += f'  A{i}["{node_text}"]\n'
-    for i in range(len(nodes) - 1):
-        mermaid += f"  A{i} --> A{i+1}\n"
-    return mermaid
-def render_mermaid(mermaid_code: str, height: int = 420):
-    html = f"""
-    <div id="mermaid-target">
-      <pre class="mermaid">
-{mermaid_code}
-      </pre>
     </div>
     <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
-    <script>
-      mermaid.initialize({{startOnLoad:true}});
-    </script>
     """
-    st.components.v1.html(html, height=height, scrolling=True)
 # ============ STREAMLIT UI ============
-st.set_page_config(page_title="PDF Q&A resilient", layout="wide")
-st.title("📄 PDF Q&A — resilient (OpenRouter → local fallback)")
-# Session state initialization
-if 'text_data' not in st.session_state:
-    st.session_state.text_data = None
-if 'summary' not in st.session_state:
-    st.session_state.summary = None
-if 'mermaid' not in st.session_state:
-    st.session_state.mermaid = None
-if 'diagnostics' not in st.session_state:
-    st.session_state.diagnostics = {"summary_source": None, "mermaid_source": None, "tts_source": None}
-def process_pdf():
-    uploaded_file = st.session_state.uploaded_file
-    if uploaded_file:
-        try:
-            with st.spinner("Extracting text from PDF..."):
-                raw_text = extract_text_from_pdf(uploaded_file)
-                st.session_state.text_data = raw_text
-                st.success(f"Extracted {len(raw_text)} characters")
-        except Exception as e:
-            st.error(f"PDF extraction failed: {e}")
-            st.session_state.text_data = None
-    else:
-        st.session_state.text_data = None
-def generate_outputs():
-    raw_text = st.session_state.text_data
-    if not raw_text:
-        st.error("No text available to process. Please upload a PDF.")
-        return
-    used = st.session_state.diagnostics
-    # Summarize logic
-    with st.spinner("Trying OpenRouter summarization..."):
-        prompt = f"Summarize the following text in 5-8 concise bullets:\n\n{raw_text[:15000]}"
-        messages = [{"role": "system", "content": "You are a concise summarizer."},
-                    {"role": "user", "content": prompt}]
-        ok, out = openrouter_chat(messages, max_tokens=400, model=OPENROUTER_MODEL)
-        if ok:
-            used["summary_source"] = "openrouter"
-            summary = out
         else:
-            used["summary_source"] = f"fallback_local (reason: {out})"
-            summary = extractive_summary(raw_text, num_sentences=6)
-    st.session_state.summary = summary
-    st.session_state.diagnostics = used
-    # Mermaid logic
-    with st.spinner("Generating Mermaid diagram (HF → local fallback)..."):
-        mermaid = generate_mermaid_from_summary(summary)
-        used["mermaid_source"] = "huggingface" if HUGGINGFACE_KEY and mermaid.strip().startswith(("flowchart","graph")) else "local"
-    st.session_state.mermaid = mermaid
-    st.session_state.diagnostics = used
-    st.success("Summary and Diagram generated!")
-# UI layout
-c1, c2, c3 = st.columns(3)
-with c1:
-    st.write("OpenRouter:")
-    st.success("Key present") if OPENROUTER_KEY else st.error("Key missing — will use local summarizer/Q&A fallback")
-with c2:
-    st.write("Hugging Face:")
-    st.success("Key present (optional)") if HUGGINGFACE_KEY else st.info("Key missing — using local Mermaid fallback")
-with c3:
-    st.write("Audio:")
-    if ELEVEN_API_KEY:
-        st.success("ElevenLabs key present (preferred)")
-    elif HAS_PYTTSX3:
-        st.info("Using local pyttsx3 fallback TTS")
-    else:
-        st.info("No ElevenLabs key and pyttsx3 not available")
-st.file_uploader("Upload a PDF", type=["pdf"], key='uploaded_file', on_change=process_pdf)
-if st.session_state.text_data:
-    st.button("Summarize & Diagram", on_click=generate_outputs)
-if st.session_state.summary:
-    st.subheader("📌 Summary")
-    st.write(st.session_state.summary)
-    st.markdown(f"**Summary source:** {st.session_state.diagnostics['summary_source']}")
-    st.subheader("🗺️ Summary Diagram")
-    render_mermaid(st.session_state.mermaid, height=460)
-    st.code(st.session_state.mermaid, language="mermaid")
-    st.markdown(f"**Mermaid source:** {st.session_state.diagnostics['mermaid_source']}")
-    st.write("### TTS Audio")
-    if st.checkbox("Generate audio for summary"):
-        with st.spinner("Attempting TTS..."):
             audio_bytes = None
-            if ELEVEN_API_KEY:
                 ok, out = eleven_tts_bytes(st.session_state.summary)
                 if ok:
-                    st.session_state.diagnostics["tts_source"] = "elevenlabs"
                     audio_bytes = out
                 else:
-                    st.session_state.diagnostics["tts_source"] = f"elevenlabs_failed ({out})"
-                    if HAS_PYTTSX3:
-                        ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
-                        if ok2:
-                            st.session_state.diagnostics["tts_source"] = "pyttsx3"
-                            audio_bytes = out2
-            elif HAS_PYTTSX3:
-                ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
-                if ok2:
-                    st.session_state.diagnostics["tts_source"] = "pyttsx3"
-                    audio_bytes = out2
             if audio_bytes:
-                if st.session_state.diagnostics["tts_source"] == "elevenlabs":
-                    st.audio(audio_bytes, format="audio/mp3")
-                else:
-                    st.audio(audio_bytes, format="audio/wav")
             else:
-                st.error("Audio generation failed. Check your API keys and local setup.")
-            st.markdown(f"**TTS source:** {st.session_state.diagnostics['tts_source']}")
-    st.write("### Diagnostics")
-    st.json(st.session_state.diagnostics)
     st.markdown("---")
-    st.subheader("❓ Q&A")
-    query = st.text_input("Ask a question about the PDF:")
-    if query:
-        with st.spinner("Processing your question..."):
-            if OPENROUTER_KEY:
-                prompt = f"Context:\n{st.session_state.text_data[:15000]}\n\nQuestion: {query}\nAnswer concisely."
-                messages = [{"role": "system", "content": "You are a helpful assistant."},
-                            {"role": "user", "content": prompt}]
-                ok, out = openrouter_chat(messages, max_tokens=600, model=OPENROUTER_MODEL)
                 if ok:
-                    st.info("Answer from OpenRouter:")
-                    st.write(out)
-                else:
-                    st.warning(f"OpenRouter failed: {out}\nFalling back to local Q&A.")
-                    sentences = re.split(r'(?<=[.!?])\s+', st.session_state.text_data)
-                    q_words = re.findall(r'\w+', query.lower())[:3]
-                    matches = [s for s in sentences if all(w in s.lower() for w in q_words)]
-                    if matches:
-                        st.info("Answer from local fallback:")
-                        st.write(matches[:3])
-                    else:
-                        st.info("No good local match found.")
-            else:
-                st.info("OpenRouter key missing. Using local Q&A fallback.")
-                sentences = re.split(r'(?<=[.!?])\s+', st.session_state.text_data)
-                q_words = re.findall(r'\w+', query.lower())[:3]
-                matches = [s for s in sentences if all(w in s.lower() for w in q_words)]
-                if matches:
-                    st.info("Answer from local fallback:")
-                    st.write(matches[:3])
                 else:
-                    st.info("No good local match found.")

+# app.py - Corrected and Simplified
 import os
 import re
 import tempfile
 HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B")
 # ============ HELPERS ============
 def clean_text(text: str) -> str:
     return re.sub(r"\s+", " ", text or "").strip()
 def extract_text_from_pdf(uploaded_file) -> str:
     reader = PdfReader(uploaded_file)
+    parts = [page.extract_text() for page in reader.pages if page.extract_text()]
     return clean_text(" ".join(parts))
+def openrouter_chat(messages: List[dict], model: str, max_tokens: int, temperature: float):
     if not OPENROUTER_KEY:
         return False, "OPENROUTER_API_KEY not set"
     url = "https://api.openrouter.ai/v1/chat/completions"
         resp = requests.post(url, json=payload, headers=headers, timeout=30)
         resp.raise_for_status()
         data = resp.json()
+        content = data['choices'][0]['message']['content']
+        return True, content
     except Exception as e:
+        return False, f"OpenRouter request failed: {e}"
 def extractive_summary(text: str, num_sentences: int = 6) -> str:
     if not text:
         return ""
     sentences = re.split(r'(?<=[.!?])\s+', text)
     words = re.findall(r'\w+', text.lower())
+    stopwords = set(["the", "and", "is", "in", "to", "of", "a", "that", "it", "for"])
+    freq = {w: words.count(w) for w in words if w not in stopwords and len(w) > 1}
+    sent_scores = [(sum(freq.get(w, 0) for w in re.findall(r'\w+', s.lower())), s) for s in sentences]
+    sent_scores.sort(reverse=True)
+    chosen_sentences = sorted([s for _, s in sent_scores[:num_sentences]], key=text.find)
+    return "\n".join(f"- {clean_text(s)}" for s in chosen_sentences if s.strip())
+def eleven_tts_bytes(text: str):
     if not ELEVEN_API_KEY:
         return False, "ELEVEN_API_KEY not set"
+    url = "https://api.elevenlabs.io/v1/text-to-speech/pnCWbS8Aqipqqr5wzjuy"
     headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
+    data = {"text": text, "model_id": "eleven_multilingual_v2"}
     try:
         r = requests.post(url, json=data, headers=headers, timeout=30)
+        r.raise_for_status()
+        return True, r.content
     except Exception as e:
+        return False, f"ElevenLabs request failed: {e}"
 def pyttsx3_tts_file(text: str):
     if not HAS_PYTTSX3:
         return False, "pyttsx3 not installed"
     try:
         engine = pyttsx3.init()
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        engine.save_to_file(text, temp_file.name)
         engine.runAndWait()
+        with open(temp_file.name, "rb") as f:
+            return True, f.read()
     except Exception as e:
+        return False, f"pyttsx3 TTS failed: {e}"
 def generate_mermaid_from_summary(summary: str):
+    prompt = ("Create a concise Mermaid flowchart ('flowchart TD') from the following summary. "
+              "Output only the Mermaid code block. Summary:\n" + summary)
+    if HUGGINGFACE_KEY:
+        url = f"https://api-inference.huggingface.co/models/{HF_MERMAID_MODEL}"
+        headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}"}
+        payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512}}
+        try:
+            response = requests.post(url, headers=headers, json=payload, timeout=40)
+            if response.ok and response.json():
+                text = response.json()[0]['generated_text']
+                match = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", text)
+                if match:
+                    return match.group(1).strip()
+        except Exception:
+            pass  # Fallback to local
+    # Local fallback logic
+    nodes = [re.sub(r'^- ', '', line).strip() for line in summary.split('\n') if line.strip()]
     if not nodes:
+        return "graph TD\n  A[Summary Empty]"
+    mermaid_code = "graph TD\n"
+    for i, node_text in enumerate(nodes[:8]):
+        mermaid_code += f'  A{i}["{node_text.replace('"', "'")[:60]}"]\n'
+    for i in range(len(nodes[:8]) - 1):
+        mermaid_code += f"  A{i} --> A{i+1}\n"
+    return mermaid_code
+def render_mermaid(mermaid_code: str):
+    html_code = f"""
+    <div class="mermaid">
+      {mermaid_code}
     </div>
     <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
+    <style>
+      .mermaid-container {{ height: 420px; border: 1px solid #ddd; padding: 10px; border-radius: 8px; }}
+    </style>
     """
+    st.components.v1.html(html_code, height=450, scrolling=True)
 # ============ STREAMLIT UI ============
+st.set_page_config(page_title="PDF Assistant", layout="wide")
+st.title("📄 PDF Assistant: Summary, Diagram, Q&A")
+st.markdown("---")
+st.session_state.setdefault('raw_text', None)
+st.session_state.setdefault('summary', None)
+st.session_state.setdefault('mermaid_code', None)
+st.session_state.setdefault('chat_history', [])
+with st.sidebar:
+    st.header("🔑 API Status")
+    st.markdown(f"**OpenRouter:** {'✅ Key present' if OPENROUTER_KEY else '❌ Key missing. Q&A will be local.'}")
+    st.markdown(f"**Hugging Face:** {'✅ Key present' if HUGGINGFACE_KEY else '❌ Key missing. Diagram will be local.'}")
+    st.markdown(f"**ElevenLabs:** {'✅ Key present' if ELEVEN_API_KEY else '❌ Key missing. TTS will be local.'}")
+    if not HAS_PYTTSX3:
+        st.warning("pyttsx3 not installed. Local audio disabled.")
+uploaded_file = st.file_uploader("1. Upload a PDF", type=["pdf"])
+if uploaded_file and st.session_state.raw_text is None:
+    with st.spinner("Extracting text..."):
+        st.session_state.raw_text = extract_text_from_pdf(uploaded_file)
+        if st.session_state.raw_text:
+            st.success("Text extracted successfully!")
         else:
+            st.warning("No text extracted from PDF. Is it a scanned image?")
+if st.session_state.raw_text:
+    st.markdown("---")
+    if st.button("2. Generate Summary & Diagram"):
+        with st.spinner("Generating summary and diagram..."):
+            # Generate Summary
+            prompt = f"Summarize the text in 5-8 concise bullet points:\n\n{st.session_state.raw_text[:15000]}"
+            ok, out = openrouter_chat([{"role": "user", "content": prompt}], OPENROUTER_MODEL, 400, 0.2)
+            st.session_state.summary = out if ok else extractive_summary(st.session_state.raw_text)
+            st.session_state.mermaid_code = generate_mermaid_from_summary(st.session_state.summary)
+    if st.session_state.summary:
+        st.header("📌 Summary")
+        st.markdown(st.session_state.summary)
+        st.header("🗺️ Diagram")
+        render_mermaid(st.session_state.mermaid_code)
+        st.code(st.session_state.mermaid_code, language="mermaid")
+        st.header("🔊 Audio")
+        if st.button("Generate Audio"):
             audio_bytes = None
+            audio_source = "None"
+            with st.spinner("Generating audio..."):
                 ok, out = eleven_tts_bytes(st.session_state.summary)
                 if ok:
                     audio_bytes = out
+                    audio_source = "ElevenLabs"
+                elif HAS_PYTTSX3:
+                    ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
+                    if ok2:
+                        audio_bytes = out2
+                        audio_source = "pyttsx3"
                 else:
+                    st.error("Audio generation failed: No API key and pyttsx3 not available.")
             if audio_bytes:
+                st.audio(audio_bytes, format="audio/mpeg" if audio_source == "ElevenLabs" else "audio/wav")
+                st.info(f"Audio generated using: **{audio_source}**")
             else:
+                st.error("Audio generation failed. Check your API key and local setup.")
     st.markdown("---")
+    st.header("💬 Q&A Chatbot")
+    for chat_message in st.session_state.chat_history:
+        role, content = chat_message
+        with st.chat_message(role):
+            st.markdown(content)
+    prompt = st.chat_input("Ask a question about the PDF")
+    if prompt:
+        st.session_state.chat_history.append(("user", prompt))
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                qa_prompt = f"Context:\n{st.session_state.raw_text[:15000]}\n\nQuestion: {prompt}\nAnswer concisely."
+                ok, out = openrouter_chat([{"role": "user", "content": qa_prompt}], OPENROUTER_MODEL, 600, 0.2)
                 if ok:
+                    st.markdown(out)
+                    st.session_state.chat_history.append(("assistant", out))
                 else:
+                    # Naive local fallback for Q&A
+                    st.warning("OpenRouter failed. Using local fallback.")
+                    sentences = re.split(r'(?<=[.!?])\s+', st.session_state.raw_text)
+                    matches = [s for s in sentences if all(w in s.lower() for w in re.findall(r'\w+', prompt.lower())[:3])]
+                    fallback_answer = " ".join(matches[:3]) if matches else "I couldn't find a relevant answer in the document."
+                    st.markdown(fallback_answer)
+                    st.session_state.chat_history.append(("assistant", fallback_answer))