File size: 9,226 Bytes
f33a5dc
 
 
 
 
 
 
 
8945838
 
cf0600b
 
 
 
8945838
 
 
 
cf0600b
8945838
 
 
 
 
 
cf0600b
8945838
f33a5dc
cf0600b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f33a5dc
cf0600b
 
 
 
 
f33a5dc
cf0600b
 
 
 
f33a5dc
8945838
 
cf0600b
8945838
cf0600b
 
f33a5dc
cf0600b
 
8945838
 
 
cf0600b
8945838
cf0600b
 
8945838
 
 
cf0600b
 
 
8945838
 
 
cf0600b
 
 
8945838
 
 
cf0600b
8945838
 
cf0600b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f33a5dc
 
cf0600b
 
 
8945838
f33a5dc
 
 
 
 
 
 
cf0600b
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# app.py
import os
import io
import streamlit as st
from huggingface_hub import InferenceClient
import pdfplumber
from PIL import Image
import base64
from typing import Optional

# ----------------- CONFIG -----------------
LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"
TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"

HF_TOKEN = os.environ.get("HF_TOKEN")
GROQ_TOKEN = os.environ.get("GROQ_TOKEN")

# Prefer Groq if token present, otherwise HF token
client: Optional[InferenceClient] = None
try:
    if GROQ_TOKEN:
        client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
    elif HF_TOKEN:
        client = InferenceClient(api_key=HF_TOKEN)
except Exception:
    client = None

# ----------------- PAGE STYLE -----------------
st.set_page_config(page_title="PDF Buddy β€” Summarize β€’ Speak β€’ Chat β€’ Draw", layout="wide")
st.markdown(
    """
    <style>
    .main > .block-container { padding: 1.5rem 2rem; max-width: 1100px; }
    .title { font-size:28px; font-weight:700; color:#0f172a; }
    .subtitle { color:#6b7280; margin-bottom:12px; }
    .big-btn { font-weight:600; padding:10px 18px; border-radius:10px; }
    .small-muted { color:#9ca3af; font-size:12px; }
    </style>
    """,
    unsafe_allow_html=True,
)

st.markdown('<div class="title">πŸ“„ PDF Buddy β€” Summarize β€’ Speak β€’ Chat β€’ Draw</div>', unsafe_allow_html=True)
st.markdown('<div class="subtitle">Upload a PDF, get a concise summary, speak it, ask questions, or generate diagrams from prompts.</div>', unsafe_allow_html=True)

# ----------------- FUNCTIONS -----------------
def pdf_to_text_bytes(file_bytes: bytes):
    """Extract text using pdfplumber, return full text and page count."""
    text_chunks = []
    try:
        with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
            total = len(pdf.pages)
            for i, page in enumerate(pdf.pages):
                ptext = page.extract_text() or ""
                text_chunks.append(ptext)
                # simple progress output handled by caller
    except Exception as e:
        raise RuntimeError(f"PDF parsing failed: {e}")
    return "\n\n".join(text_chunks), total

def llama_summarize(text: str) -> str:
    if client is None:
        raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
    messages = [
        {"role": "system", "content": "You are a concise summarizer. Give 6 short bullet points."},
        {"role": "user", "content": f"Summarize this document in 6 concise bullet points:\n\n{text}"}
    ]
    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
    return resp.choices[0].message["content"]

def llama_chat(chat_history: list, user_question: str) -> str:
    if client is None:
        raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
    messages = chat_history + [{"role": "user", "content": user_question}]
    resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
    return resp.choices[0].message["content"]

def tts_synthesize(text: str) -> bytes:
    if client is None:
        raise RuntimeError("TTS client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
    audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
    return audio_bytes

def generate_image(prompt_text: str) -> Image.Image:
    if client is None:
        raise RuntimeError("Image generation client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
    img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
    return Image.open(io.BytesIO(img_bytes))

def make_download_link_bytes(data: bytes, filename: str, mime: str):
    b64 = base64.b64encode(data).decode()
    href = f'<a href="data:{mime};base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
    return href

# ----------------- STATE -----------------
if "uploaded_name" not in st.session_state:
    st.session_state.uploaded_name = None
if "extracted_text" not in st.session_state:
    st.session_state.extracted_text = ""
if "summary" not in st.session_state:
    st.session_state.summary = ""
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# ----------------- Uploader Column -----------------
col_left, col_right = st.columns([1, 1])
with col_left:
    uploaded = st.file_uploader("Upload PDF (single file)", type=["pdf"], help="Drag & drop or click to choose a PDF.")
    if uploaded is not None:
        # immediate feedback to user
        st.success(f"Uploaded file: **{uploaded.name}** β€” {round(len(uploaded.getvalue())/1024,1)} KB")
        st.session_state.uploaded_name = uploaded.name
        # extract text with progress
        with st.spinner("Extracting text from PDF..."):
            try:
                bytes_in = uploaded.getvalue()
                text, pages = pdf_to_text_bytes(bytes_in)
                st.session_state.extracted_text = text
                st.success(f"Extraction complete β€” {pages} pages processed. Preview shown below.")
            except Exception as e:
                st.session_state.extracted_text = ""
                st.error(f"Failed to extract PDF text: {e}")

    # show a preview (or hint)
    if st.session_state.extracted_text:
        st.subheader("Document preview (first 3000 chars)")
        st.text_area("", value=(st.session_state.extracted_text[:3000] + ("..." if len(st.session_state.extracted_text) > 3000 else "")), height=240)
    else:
        st.info("No document loaded. Upload a PDF to get started. If your file is large, extraction may take a few seconds.")

with col_right:
    # Controls: disabled until extraction is available
    disabled = not bool(st.session_state.extracted_text)
    st.subheader("Actions")
    if st.button("πŸ“ Create summary", key="summarize", disabled=disabled):
        with st.spinner("Creating summary..."):
            try:
                summary = llama_summarize(st.session_state.extracted_text[:30000])  # limit prompt length
                st.session_state.summary = summary
                st.success("Summary created.")
            except Exception as e:
                st.error(f"Summarization failed: {e}")

    if st.session_state.summary:
        st.markdown("**Summary:**")
        st.markdown(st.session_state.summary)

    if st.button("πŸ”Š Synthesize summary to audio", key="tts", disabled=disabled or not st.session_state.summary):
        with st.spinner("Synthesizing audio..."):
            try:
                wav = tts_synthesize(st.session_state.summary)
                st.audio(wav)
                st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
            except Exception as e:
                st.error(f"TTS failed: {e}")

    st.markdown("---")
    st.subheader("Chat with document")
    if "chat_history" not in st.session_state or not st.session_state.chat_history:
        # initialize with document context (short)
        context = st.session_state.extracted_text[:4000] if st.session_state.extracted_text else ""
        st.session_state.chat_history = [
            {"role": "system", "content": "You are a helpful assistant. Answer strictly using the document context."},
            {"role": "user", "content": f"Document context:\n{context}"}
        ]
    user_q = st.text_input("Ask a question about the PDF", key="user_q", disabled=disabled)
    if st.button("❓ Ask", key="ask_btn", disabled=disabled or not user_q):
        with st.spinner("Getting answer..."):
            try:
                ans = llama_chat(st.session_state.chat_history, user_q)
                st.session_state.chat_history.append({"role": "user", "content": user_q})
                st.session_state.chat_history.append({"role": "assistant", "content": ans})
                st.markdown(f"**You:** {user_q}")
                st.markdown(f"**Assistant:** {ans}")
            except Exception as e:
                st.error(f"Chat failed: {e}")

    st.markdown("---")
    st.subheader("Generate diagram from prompt (SDXL)")
    diagram_prompt = st.text_input("Describe diagram or scene", key="diagram_prompt", disabled=disabled)
    if st.button("πŸ–ΌοΈ Generate diagram", key="gen_img", disabled=disabled or not diagram_prompt):
        with st.spinner("Generating image..."):
            try:
                img = generate_image(diagram_prompt)
                st.image(img, use_column_width=True)
                buf = io.BytesIO()
                img.save(buf, format="PNG")
                st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
            except Exception as e:
                st.error(f"Image generation failed: {e}")

# ----------------- FOOTER / NOTES -----------------
st.markdown("---")
st.markdown(
    """
    **Notes**
    - API keys are read from environment variables (HF_TOKEN and/or GROQ_TOKEN). They are NOT displayed here.
    - If nothing happens after upload, try a small PDF (1–2 pages) to test extraction first.
    - If you get errors about the LLM/TTS/Image calls, confirm the tokens are set in your Space settings or `.env` (don’t commit `.env` publicly).
    """
)