ChatBotsTA commited on
Commit
cf0600b
Β·
verified Β·
1 Parent(s): 17e0e64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -151
app.py CHANGED
@@ -1,7 +1,6 @@
1
  # app.py
2
  import os
3
  import io
4
- import tempfile
5
  import streamlit as st
6
  from huggingface_hub import InferenceClient
7
  import pdfplumber
@@ -9,185 +8,178 @@ from PIL import Image
9
  import base64
10
  from typing import Optional
11
 
12
- st.set_page_config(page_title="PDF β†’ Summary + TTS + Chat + Diagram", layout="wide")
 
 
 
13
 
14
- # ---------- Config (models - change if you prefer others) ----------
15
- LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use" # Groq Llama model on HF (example)
16
- TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits" # example TTS model on HF
17
- SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0" # SDXL model on HF
18
-
19
- # ---------- Secrets: HF_TOKEN and GROQ_TOKEN ----------
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
22
 
23
- # ---------- Init InferenceClient ----------
24
  client: Optional[InferenceClient] = None
25
- client_info = ""
26
  try:
27
  if GROQ_TOKEN:
28
- # Prefer Groq provider if GROQ_TOKEN present
29
  client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
30
- client_info = "Using Groq provider (GROQ_TOKEN)"
31
  elif HF_TOKEN:
32
  client = InferenceClient(api_key=HF_TOKEN)
33
- client_info = "Using Hugging Face Inference (HF_TOKEN)"
34
- else:
35
- client_info = "NO TOKEN FOUND"
36
- except Exception as e:
37
- client_info = f"Failed to initialize InferenceClient: {e}"
38
  client = None
39
 
40
- # ---------- Helpers ----------
41
- def pdf_to_text_bytes(file_bytes: bytes) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  text_chunks = []
43
- with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
44
- for page in pdf.pages:
45
- ptext = page.extract_text()
46
- if ptext:
 
47
  text_chunks.append(ptext)
48
- return "\n\n".join(text_chunks)
 
 
 
49
 
50
  def llama_summarize(text: str) -> str:
51
  if client is None:
52
- raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
53
- # Create simple system+user prompt
54
  messages = [
55
- {"role": "system", "content": "You are a concise summarizer. Provide a short summary in bullet points."},
56
- {"role": "user", "content": f"Summarize the following document in 6-8 concise bullet points:\n\n{text}"}
57
  ]
58
- # Try chat completions API path, fallback to text generation if necessary
59
- try:
60
- resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
61
- return resp.choices[0].message["content"]
62
- except Exception:
63
- try:
64
- # fallback: text generation (single string)
65
- resp2 = client.text_generation(model=LLAMA_MODEL, inputs="Summarize:\n\n" + text, max_new_tokens=512)
66
- # resp2 may be dict-like or object; try a few access patterns
67
- if isinstance(resp2, dict) and "generated_text" in resp2:
68
- return resp2["generated_text"]
69
- # try attribute access
70
- return str(resp2)
71
- except Exception as e:
72
- raise RuntimeError(f"Summarization failed: {e}")
73
 
74
  def llama_chat(chat_history: list, user_question: str) -> str:
75
  if client is None:
76
- raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
77
  messages = chat_history + [{"role": "user", "content": user_question}]
78
- try:
79
- resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
80
- return resp.choices[0].message["content"]
81
- except Exception as e:
82
- raise RuntimeError(f"Chat completion failed: {e}")
83
 
84
  def tts_synthesize(text: str) -> bytes:
85
  if client is None:
86
- raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
87
- try:
88
- audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
89
- return audio_bytes
90
- except Exception as e:
91
- raise RuntimeError(f"TTS failed: {e}")
92
 
93
  def generate_image(prompt_text: str) -> Image.Image:
94
  if client is None:
95
- raise RuntimeError("InferenceClient not initialized (missing HF_TOKEN/GROQ_TOKEN).")
96
- try:
97
- img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
98
- return Image.open(io.BytesIO(img_bytes))
99
- except Exception as e:
100
- raise RuntimeError(f"Image generation failed: {e}")
101
 
102
  def make_download_link_bytes(data: bytes, filename: str, mime: str):
103
  b64 = base64.b64encode(data).decode()
104
- href = f'<a href="data:{mime};base64,{b64}" download="{filename}">Download {filename}</a>'
105
  return href
106
 
107
- # ---------- UI ----------
108
- st.title("PDF β†’ Summary + TTS + Chat + Diagram (Groq/HF)")
109
-
110
- st.sidebar.markdown("### Runtime info")
111
- st.sidebar.write(client_info)
112
- st.sidebar.markdown("**Required env vars**: `HF_TOKEN` and/or `GROQ_TOKEN`. Prefer `GROQ_TOKEN` for Groq provider.")
113
-
114
- if client is None:
115
- st.error("Inference client not initialized. Set HF_TOKEN or GROQ_TOKEN as environment variables in your Space.")
116
- st.stop()
117
-
118
- uploaded = st.file_uploader("Upload a PDF to analyze", type=["pdf"])
119
- if uploaded:
120
- file_bytes = uploaded.read()
121
- with st.spinner("Extracting text from PDF..."):
122
- try:
123
- text = pdf_to_text_bytes(file_bytes)
124
- except Exception as e:
125
- st.error(f"Failed to extract text from PDF: {e}")
126
- text = ""
127
- st.subheader("Document preview (first 2000 chars)")
128
- st.text_area("", value=(text[:2000] + ("..." if len(text) > 2000 else "")), height=220)
129
-
130
- col1, col2 = st.columns(2)
131
-
132
- with col1:
133
- if st.button("Create summary"):
134
- if not text.strip():
135
- st.error("Document text empty or extraction failed.")
136
- else:
137
- with st.spinner("Summarizing with Llama..."):
138
- try:
139
- summary = llama_summarize(text)
140
- st.session_state["summary"] = summary
141
- st.subheader("Summary")
142
- st.markdown(summary)
143
- except Exception as e:
144
- st.error(str(e))
145
-
146
- if "summary" in st.session_state:
147
- summary = st.session_state["summary"]
148
- if st.button("Synthesize summary to audio"):
149
- with st.spinner("Generating speech..."):
150
- try:
151
- wav = tts_synthesize(summary)
152
- st.audio(wav)
153
- st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
154
- except Exception as e:
155
- st.error(str(e))
156
-
157
- with col2:
158
- st.subheader("Chat with the document")
159
- if "chat_history" not in st.session_state:
160
- doc_context = text[:4000] if text else ""
161
- st.session_state["chat_history"] = [
162
- {"role":"system","content":"You are an assistant that answers questions based only on the provided document context."},
163
- {"role":"user","content": f"Document context:\n{doc_context}"}
164
- ]
165
- st.session_state["convo_display"] = []
166
-
167
- user_q = st.text_input("Ask a question about the PDF")
168
- if st.button("Ask question") and user_q.strip():
169
- with st.spinner("Getting answer from Llama..."):
170
- try:
171
- answer = llama_chat(st.session_state["chat_history"], user_q)
172
- # show and store
173
- st.session_state["convo_display"].append(("You", user_q))
174
- st.session_state["convo_display"].append(("Assistant", answer))
175
- st.session_state["chat_history"].append({"role":"user","content":user_q})
176
- st.session_state["chat_history"].append({"role":"assistant","content":answer})
177
- except Exception as e:
178
- st.error(str(e))
179
-
180
- # show conversation
181
- for speaker, textline in st.session_state.get("convo_display", []):
182
- if speaker == "You":
183
- st.markdown(f"**You:** {textline}")
184
- else:
185
- st.markdown(f"**Assistant:** {textline}")
 
 
 
186
 
187
  st.markdown("---")
188
- st.subheader("Generate diagram/image from prompt (SDXL)")
189
- diagram_prompt = st.text_input("Describe the diagram or scene to generate")
190
- if st.button("Generate diagram") and diagram_prompt.strip():
191
  with st.spinner("Generating image..."):
192
  try:
193
  img = generate_image(diagram_prompt)
@@ -196,10 +188,15 @@ if uploaded:
196
  img.save(buf, format="PNG")
197
  st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
198
  except Exception as e:
199
- st.error(str(e))
200
-
201
- st.sidebar.markdown("---")
202
- st.sidebar.markdown("### Model IDs (change in app.py if you want)")
203
- st.sidebar.write(f"LLM: {LLAMA_MODEL}")
204
- st.sidebar.write(f"TTS: {TTS_MODEL}")
205
- st.sidebar.write(f"Image: {SDXL_MODEL}")
 
 
 
 
 
 
1
  # app.py
2
  import os
3
  import io
 
4
  import streamlit as st
5
  from huggingface_hub import InferenceClient
6
  import pdfplumber
 
8
  import base64
9
  from typing import Optional
10
 
11
+ # ----------------- CONFIG -----------------
12
+ LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"
13
+ TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
14
+ SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
15
 
 
 
 
 
 
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
  GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
18
 
19
+ # Prefer Groq if token present, otherwise HF token
20
  client: Optional[InferenceClient] = None
 
21
  try:
22
  if GROQ_TOKEN:
 
23
  client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
 
24
  elif HF_TOKEN:
25
  client = InferenceClient(api_key=HF_TOKEN)
26
+ except Exception:
 
 
 
 
27
  client = None
28
 
29
+ # ----------------- PAGE STYLE -----------------
30
+ st.set_page_config(page_title="PDF Buddy β€” Summarize β€’ Speak β€’ Chat β€’ Draw", layout="wide")
31
+ st.markdown(
32
+ """
33
+ <style>
34
+ .main > .block-container { padding: 1.5rem 2rem; max-width: 1100px; }
35
+ .title { font-size:28px; font-weight:700; color:#0f172a; }
36
+ .subtitle { color:#6b7280; margin-bottom:12px; }
37
+ .big-btn { font-weight:600; padding:10px 18px; border-radius:10px; }
38
+ .small-muted { color:#9ca3af; font-size:12px; }
39
+ </style>
40
+ """,
41
+ unsafe_allow_html=True,
42
+ )
43
+
44
+ st.markdown('<div class="title">πŸ“„ PDF Buddy β€” Summarize β€’ Speak β€’ Chat β€’ Draw</div>', unsafe_allow_html=True)
45
+ st.markdown('<div class="subtitle">Upload a PDF, get a concise summary, speak it, ask questions, or generate diagrams from prompts.</div>', unsafe_allow_html=True)
46
+
47
+ # ----------------- FUNCTIONS -----------------
48
+ def pdf_to_text_bytes(file_bytes: bytes):
49
+ """Extract text using pdfplumber, return full text and page count."""
50
  text_chunks = []
51
+ try:
52
+ with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
53
+ total = len(pdf.pages)
54
+ for i, page in enumerate(pdf.pages):
55
+ ptext = page.extract_text() or ""
56
  text_chunks.append(ptext)
57
+ # simple progress output handled by caller
58
+ except Exception as e:
59
+ raise RuntimeError(f"PDF parsing failed: {e}")
60
+ return "\n\n".join(text_chunks), total
61
 
62
  def llama_summarize(text: str) -> str:
63
  if client is None:
64
+ raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
 
65
  messages = [
66
+ {"role": "system", "content": "You are a concise summarizer. Give 6 short bullet points."},
67
+ {"role": "user", "content": f"Summarize this document in 6 concise bullet points:\n\n{text}"}
68
  ]
69
+ resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
70
+ return resp.choices[0].message["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def llama_chat(chat_history: list, user_question: str) -> str:
73
  if client is None:
74
+ raise RuntimeError("LLM client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
75
  messages = chat_history + [{"role": "user", "content": user_question}]
76
+ resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
77
+ return resp.choices[0].message["content"]
 
 
 
78
 
79
  def tts_synthesize(text: str) -> bytes:
80
  if client is None:
81
+ raise RuntimeError("TTS client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
82
+ audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
83
+ return audio_bytes
 
 
 
84
 
85
  def generate_image(prompt_text: str) -> Image.Image:
86
  if client is None:
87
+ raise RuntimeError("Image generation client not initialized (missing HF_TOKEN/GROQ_TOKEN).")
88
+ img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
89
+ return Image.open(io.BytesIO(img_bytes))
 
 
 
90
 
91
  def make_download_link_bytes(data: bytes, filename: str, mime: str):
92
  b64 = base64.b64encode(data).decode()
93
+ href = f'<a href="data:{mime};base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
94
  return href
95
 
96
+ # ----------------- STATE -----------------
97
+ if "uploaded_name" not in st.session_state:
98
+ st.session_state.uploaded_name = None
99
+ if "extracted_text" not in st.session_state:
100
+ st.session_state.extracted_text = ""
101
+ if "summary" not in st.session_state:
102
+ st.session_state.summary = ""
103
+ if "chat_history" not in st.session_state:
104
+ st.session_state.chat_history = []
105
+
106
+ # ----------------- Uploader Column -----------------
107
+ col_left, col_right = st.columns([1, 1])
108
+ with col_left:
109
+ uploaded = st.file_uploader("Upload PDF (single file)", type=["pdf"], help="Drag & drop or click to choose a PDF.")
110
+ if uploaded is not None:
111
+ # immediate feedback to user
112
+ st.success(f"Uploaded file: **{uploaded.name}** β€” {round(len(uploaded.getvalue())/1024,1)} KB")
113
+ st.session_state.uploaded_name = uploaded.name
114
+ # extract text with progress
115
+ with st.spinner("Extracting text from PDF..."):
116
+ try:
117
+ bytes_in = uploaded.getvalue()
118
+ text, pages = pdf_to_text_bytes(bytes_in)
119
+ st.session_state.extracted_text = text
120
+ st.success(f"Extraction complete β€” {pages} pages processed. Preview shown below.")
121
+ except Exception as e:
122
+ st.session_state.extracted_text = ""
123
+ st.error(f"Failed to extract PDF text: {e}")
124
+
125
+ # show a preview (or hint)
126
+ if st.session_state.extracted_text:
127
+ st.subheader("Document preview (first 3000 chars)")
128
+ st.text_area("", value=(st.session_state.extracted_text[:3000] + ("..." if len(st.session_state.extracted_text) > 3000 else "")), height=240)
129
+ else:
130
+ st.info("No document loaded. Upload a PDF to get started. If your file is large, extraction may take a few seconds.")
131
+
132
+ with col_right:
133
+ # Controls: disabled until extraction is available
134
+ disabled = not bool(st.session_state.extracted_text)
135
+ st.subheader("Actions")
136
+ if st.button("πŸ“ Create summary", key="summarize", disabled=disabled):
137
+ with st.spinner("Creating summary..."):
138
+ try:
139
+ summary = llama_summarize(st.session_state.extracted_text[:30000]) # limit prompt length
140
+ st.session_state.summary = summary
141
+ st.success("Summary created.")
142
+ except Exception as e:
143
+ st.error(f"Summarization failed: {e}")
144
+
145
+ if st.session_state.summary:
146
+ st.markdown("**Summary:**")
147
+ st.markdown(st.session_state.summary)
148
+
149
+ if st.button("πŸ”Š Synthesize summary to audio", key="tts", disabled=disabled or not st.session_state.summary):
150
+ with st.spinner("Synthesizing audio..."):
151
+ try:
152
+ wav = tts_synthesize(st.session_state.summary)
153
+ st.audio(wav)
154
+ st.markdown(make_download_link_bytes(wav, "summary.wav", "audio/wav"), unsafe_allow_html=True)
155
+ except Exception as e:
156
+ st.error(f"TTS failed: {e}")
157
+
158
+ st.markdown("---")
159
+ st.subheader("Chat with document")
160
+ if "chat_history" not in st.session_state or not st.session_state.chat_history:
161
+ # initialize with document context (short)
162
+ context = st.session_state.extracted_text[:4000] if st.session_state.extracted_text else ""
163
+ st.session_state.chat_history = [
164
+ {"role": "system", "content": "You are a helpful assistant. Answer strictly using the document context."},
165
+ {"role": "user", "content": f"Document context:\n{context}"}
166
+ ]
167
+ user_q = st.text_input("Ask a question about the PDF", key="user_q", disabled=disabled)
168
+ if st.button("❓ Ask", key="ask_btn", disabled=disabled or not user_q):
169
+ with st.spinner("Getting answer..."):
170
+ try:
171
+ ans = llama_chat(st.session_state.chat_history, user_q)
172
+ st.session_state.chat_history.append({"role": "user", "content": user_q})
173
+ st.session_state.chat_history.append({"role": "assistant", "content": ans})
174
+ st.markdown(f"**You:** {user_q}")
175
+ st.markdown(f"**Assistant:** {ans}")
176
+ except Exception as e:
177
+ st.error(f"Chat failed: {e}")
178
 
179
  st.markdown("---")
180
+ st.subheader("Generate diagram from prompt (SDXL)")
181
+ diagram_prompt = st.text_input("Describe diagram or scene", key="diagram_prompt", disabled=disabled)
182
+ if st.button("πŸ–ΌοΈ Generate diagram", key="gen_img", disabled=disabled or not diagram_prompt):
183
  with st.spinner("Generating image..."):
184
  try:
185
  img = generate_image(diagram_prompt)
 
188
  img.save(buf, format="PNG")
189
  st.download_button("Download diagram (PNG)", data=buf.getvalue(), file_name="diagram.png", mime="image/png")
190
  except Exception as e:
191
+ st.error(f"Image generation failed: {e}")
192
+
193
+ # ----------------- FOOTER / NOTES -----------------
194
+ st.markdown("---")
195
+ st.markdown(
196
+ """
197
+ **Notes**
198
+ - API keys are read from environment variables (HF_TOKEN and/or GROQ_TOKEN). They are NOT displayed here.
199
+ - If nothing happens after upload, try a small PDF (1–2 pages) to test extraction first.
200
+ - If you get errors about the LLM/TTS/Image calls, confirm the tokens are set in your Space settings or `.env` (don’t commit `.env` publicly).
201
+ """
202
+ )