Spaces:

ChatBotsTA
/

pdf-summary

Sleeping

App Files Files Community

ChatBotsTA commited on Sep 5, 2025

Commit

b824d13

verified ·

1 Parent(s): 06f01a2

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -133

app.py CHANGED Viewed

@@ -1,145 +1,194 @@
 import os
-import gradio as gr
-import fitz  # PyMuPDF
-import pinecone
-import qdrant_client
-from openai import OpenAI
-import graphviz
 import requests
-# =================== CONFIG ===================
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
-PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
-QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
-ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
-client = OpenAI(api_key=OPENAI_API_KEY)
-VECTOR_DB = "qdrant"  # change to "pinecone" if you want Pinecone
-# =================== HELPERS ===================
-def extract_text_from_pdf(pdf_path):
-    doc = fitz.open(pdf_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-def ingest_text(text, doc_name="doc"):
-    if VECTOR_DB == "qdrant":
-        qclient = qdrant_client.QdrantClient(":memory:")
-        qclient.recreate_collection(
-            collection_name=doc_name,
-            vectors_config={"size": 1536, "distance": "Cosine"}
-        )
-        qclient.upload_points(
-            collection_name=doc_name,
-            points=[
-                {"id": 0, "vector": [0.0]*1536, "payload": {"text": text}}
-            ]
         )
-    elif VECTOR_DB == "pinecone":
-        pinecone.init(api_key=PINECONE_API_KEY, environment="gcp-starter")
-        if doc_name not in pinecone.list_indexes():
-            pinecone.create_index(doc_name, dimension=1536, metric="cosine")
-        index = pinecone.Index(doc_name)
-        index.upsert([(str(0), [0.0]*1536, {"text": text})])
-    return f"Ingested {len(text.split())} words."
-def summarize_text(text):
-    resp = client.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=[{"role": "system", "content": "Summarize clearly."},
-                  {"role": "user", "content": text[:4000]}]
-    )
-    return resp.choices[0].message.content
-def generate_audio(summary_text):
-    if not ELEVENLABS_API_KEY:
         return None
-    url = "https://api.elevenlabs.io/v1/text-to-speech/pnYgVoCjYp9s9v1sXlKS"  # default voice
     headers = {
-        "xi-api-key": ELEVENLABS_API_KEY,
         "Content-Type": "application/json"
     }
     data = {
-        "text": summary_text,
-        "voice_settings": {"stability": 0.5, "similarity_boost": 0.7}
     }
-    response = requests.post(url, headers=headers, json=data)
-    if response.status_code == 200:
-        audio_path = "summary_audio.mp3"
-        with open(audio_path, "wb") as f:
-            f.write(response.content)
-        return audio_path
-    else:
         return None
-def generate_diagram(text):
-    dot = graphviz.Digraph()
-    dot.node("A", "PDF Content")
-    dot.node("B", "Summary")
-    dot.node("C", "Key Ideas")
-    dot.edges([("A", "B"), ("B", "C")])
-    return dot.pipe(format="png")
-def chat_with_pdf(text, question):
-    resp = client.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are a helpful assistant with access to the document."},
-            {"role": "user", "content": f"Document:\n{text[:3000]}\n\nQuestion: {question}"}
-        ]
-    )
-    return resp.choices[0].message.content
-# =================== GRADIO APP ===================
-with gr.Blocks(theme="soft") as demo:
-    gr.Markdown("# 📚 PDF Assistant — Summarize, Diagram, Audio, Chat")
-    with gr.Row():
-        pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
-        doc_name = gr.Textbox(label="Doc name", value="mydoc")
-    ingest_btn = gr.Button("🚀 Ingest PDF")
-    ingest_status = gr.Markdown("")
-    summary_btn = gr.Button("📖 Summarize")
-    summary_output = gr.Textbox(label="Summary", lines=8)
-    audio_output = gr.Audio(label="🔊 Summary Audio")
-    diagram_btn = gr.Button("📝 Generate Diagram")
-    diagram_output = gr.Image(type="numpy", label="Diagram Preview")
-    with gr.Row():
-        question = gr.Textbox(label="Ask the PDF a question")
-        answer = gr.Textbox(label="Answer")
-        ask_btn = gr.Button("💬 Ask")
-    pdf_text_state = gr.State("")
-    def handle_ingest(pdf_file, doc_name):
-        text = extract_text_from_pdf(pdf_file.name)
-        status = ingest_text(text, doc_name)
-        return text, status
-    def handle_summary(text):
-        summary = summarize_text(text)
-        audio = generate_audio(summary)
-        return summary, audio
-    ingest_btn.click(handle_ingest, [pdf_file, doc_name], [pdf_text_state, ingest_status])
-    summary_btn.click(handle_summary, pdf_text_state, [summary_output, audio_output])
-    diagram_btn.click(lambda t: generate_diagram(t), pdf_text_state, diagram_output)
-    ask_btn.click(lambda t, q: chat_with_pdf(t, q), [pdf_text_state, question], answer)
-demo.launch()

+# streamlit_pdf_qa.py
 import os
+import streamlit as st
+import openai
+from PyPDF2 import PdfReader
 import requests
+import re
+from typing import List, Optional
+# ============ CONFIG =============
+openai.api_key = os.getenv("OPENAI_API_KEY")
+ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
+# optional: allow switching model by env or fallback
+OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")  # fallback to what's set
+# ============ HELPERS ============
+def clean_text(text: str) -> str:
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+@st.cache_data(show_spinner=False)
+def extract_text_from_pdf(uploaded_file) -> str:
+    """
+    Extract all text from a PDF UploadFile (or file-like)
+    """
+    reader = PdfReader(uploaded_file)
+    text_parts = []
+    for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text_parts.append(page_text)
+    return clean_text(" ".join(text_parts))
+def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
+    """
+    Chunk text by character length. Overlap helps keep context across chunks.
+    """
+    chunks = []
+    start = 0
+    text_len = len(text)
+    while start < text_len:
+        end = start + chunk_size
+        chunks.append(text[start:end])
+        start = max(end - overlap, end)
+    return chunks
+def call_openai_chat(messages: list, max_tokens: int = 1000, temperature: float = 0.2) -> str:
+    if not openai.api_key:
+        raise RuntimeError("OPENAI_API_KEY not set")
+    try:
+        response = openai.ChatCompletion.create(
+            model=OPENAI_MODEL,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
         )
+        # robust extraction of content
+        content = None
+        if response and "choices" in response and len(response["choices"]) > 0:
+            choice = response["choices"][0]
+            # choice may contain 'message' dict
+            if "message" in choice and "content" in choice["message"]:
+                content = choice["message"]["content"]
+            # fallback
+            elif "text" in choice:
+                content = choice["text"]
+        return content or ""
+    except Exception as e:
+        # raise the exception upward so UI can show it
+        raise
+def ask_gpt(question: str, context: str) -> str:
+    prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt},
+    ]
+    return call_openai_chat(messages, max_tokens=600)
+def summarize_text(text: str) -> str:
+    prompt = f"Summarize the following text in a clear, concise way:\n\n{text}"
+    messages = [
+        {"role": "system", "content": "You are a concise summarizer."},
+        {"role": "user", "content": prompt},
+    ]
+    return call_openai_chat(messages, max_tokens=400)
+def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
+    """
+    Send text to ElevenLabs text-to-speech API.
+    Returns raw audio bytes or None on failure.
+    """
+    if not ELEVEN_API_KEY:
         return None
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     headers = {
+        "Accept": "audio/mpeg",
+        "xi-api-key": ELEVEN_API_KEY,
         "Content-Type": "application/json"
     }
     data = {
+        "text": text,
+        "model_id": "eleven_multilingual_v2",
+        "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
     }
+    try:
+        resp = requests.post(url, json=data, headers=headers, timeout=30)
+        if resp.ok:
+            return resp.content
+        else:
+            st.warning(f"ElevenLabs TTS failed: {resp.status_code} {resp.text[:300]}")
+            return None
+    except Exception as e:
+        st.warning(f"ElevenLabs TTS error: {e}")
         return None
+# ============ STREAMLIT APP ============
+st.set_page_config(page_title="PDF Q&A", layout="wide")
+st.title("📄 PDF Q&A with Summarization + Audio")
+# show API key status
+col1, col2 = st.columns(2)
+with col1:
+    if openai.api_key:
+        st.success("OpenAI key detected ✅")
+    else:
+        st.error("OPENAI_API_KEY is not set. Chat features will not work.")
+with col2:
+    if ELEVEN_API_KEY:
+        st.success("ElevenLabs key detected ✅")
+    else:
+        st.info("ELEVEN_API_KEY not set. Audio playback will be disabled.")
+uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
+if uploaded_file is not None:
+    try:
+        with st.spinner("Extracting text from PDF..."):
+            raw_text = extract_text_from_pdf(uploaded_file)
+    except Exception as e:
+        st.error(f"Failed to extract PDF text: {e}")
+        raw_text = ""
+    if not raw_text:
+        st.warning("No text was extracted from this PDF. It may be scanned images (OCR needed).")
+    else:
+        st.success("PDF loaded successfully ✅")
+        st.markdown(f"**Extracted text length:** {len(raw_text)} characters")
+        # Summarize button
+        if st.button("Summarize Document"):
+            with st.spinner("Summarizing..."):
+                try:
+                    # limit input size for summarization
+                    to_summarize = raw_text[:15000]
+                    summary = summarize_text(to_summarize)
+                    st.subheader("📌 Summary")
+                    st.write(summary)
+                    audio_bytes = text_to_speech_eleven(summary)
+                    if audio_bytes:
+                        st.audio(audio_bytes, format="audio/mp3")
+                    elif ELEVEN_API_KEY is None:
+                        st.info("TTS skipped because ELEVEN_API_KEY is not set.")
+                except Exception as e:
+                    st.error(f"Summarization failed: {e}")
+        # Q&A textbox
+        query = st.text_input("Ask a question about the PDF:")
+        if query:
+            with st.spinner("Thinking..."):
+                try:
+                    chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
+                    # keep a small number of chunks to control cost/time
+                    answers = []
+                    max_chunks = 3
+                    for i, c in enumerate(chunks[:max_chunks]):
+                        ans = ask_gpt(query, c)
+                        answers.append(ans)
+                    final_answer = "\n\n".join([a for a in answers if a])
+                    if not final_answer.strip():
+                        st.warning("No answer returned from the model.")
+                    else:
+                        st.subheader("💡 Answer")
+                        st.write(final_answer)
+                        audio = text_to_speech_eleven(final_answer)
+                        if audio:
+                            st.audio(audio, format="audio/mp3")
+                        elif ELEVEN_API_KEY is None:
+                            st.info("TTS skipped because ELEVEN_API_KEY is not set.")
+                except Exception as e:
+                    st.error(f"Q&A failed: {e}")
+else:
+    st.info("Upload a PDF to begin.")