Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
# Cell 9
|
| 2 |
-
app_code = r'''
|
| 3 |
import os, io, re, json, base64, requests, numpy as np
|
| 4 |
import streamlit as st
|
| 5 |
from pypdf import PdfReader
|
|
@@ -123,7 +121,6 @@ def extract_text_from_pdf(file) -> str:
|
|
| 123 |
|
| 124 |
def make_word_freq_chart(text: str, top_k=20):
|
| 125 |
text = text.lower()
|
| 126 |
-
# lightweight stopword list
|
| 127 |
stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
|
| 128 |
tokens = re.findall(r"[a-zA-Z]{3,}", text)
|
| 129 |
freq = {}
|
|
@@ -203,14 +200,12 @@ if uploaded:
|
|
| 203 |
st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
|
| 204 |
with st.spinner("Thinking..."):
|
| 205 |
try:
|
| 206 |
-
# embed once/cache
|
| 207 |
if st.session_state.chunk_vecs is None:
|
| 208 |
vecs = embed_texts(st.session_state.chunks)
|
| 209 |
st.session_state.chunk_vecs = vecs
|
| 210 |
else:
|
| 211 |
vecs = st.session_state.chunk_vecs
|
| 212 |
|
| 213 |
-
# question embedding
|
| 214 |
q_vec = embed_texts([question])
|
| 215 |
sims = cosine_sim(q_vec, vecs).flatten()
|
| 216 |
top_idx = np.argsort(sims)[::-1][:3]
|
|
@@ -235,6 +230,3 @@ if uploaded:
|
|
| 235 |
|
| 236 |
else:
|
| 237 |
st.info("Upload a PDF to get started.")
|
| 238 |
-
'''
|
| 239 |
-
Path("app.py").write_text(app_code, encoding="utf-8")
|
| 240 |
-
print("Wrote app.py")
|
|
|
|
|
|
|
|
|
|
| 1 |
import os, io, re, json, base64, requests, numpy as np
|
| 2 |
import streamlit as st
|
| 3 |
from pypdf import PdfReader
|
|
|
|
| 121 |
|
| 122 |
def make_word_freq_chart(text: str, top_k=20):
|
| 123 |
text = text.lower()
|
|
|
|
| 124 |
stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
|
| 125 |
tokens = re.findall(r"[a-zA-Z]{3,}", text)
|
| 126 |
freq = {}
|
|
|
|
| 200 |
st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
|
| 201 |
with st.spinner("Thinking..."):
|
| 202 |
try:
|
|
|
|
| 203 |
if st.session_state.chunk_vecs is None:
|
| 204 |
vecs = embed_texts(st.session_state.chunks)
|
| 205 |
st.session_state.chunk_vecs = vecs
|
| 206 |
else:
|
| 207 |
vecs = st.session_state.chunk_vecs
|
| 208 |
|
|
|
|
| 209 |
q_vec = embed_texts([question])
|
| 210 |
sims = cosine_sim(q_vec, vecs).flatten()
|
| 211 |
top_idx = np.argsort(sims)[::-1][:3]
|
|
|
|
| 230 |
|
| 231 |
else:
|
| 232 |
st.info("Upload a PDF to get started.")
|
|
|
|
|
|
|
|
|