Spaces:

ChatBotsTA
/

pdf-summarizer

Sleeping

ChatBotsTA commited on Sep 4

Commit

ab1429e

verified ·

1 Parent(s): fb60b9f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# Cell 9
-app_code = r'''
 import os, io, re, json, base64, requests, numpy as np
 import streamlit as st
 from pypdf import PdfReader
@@ -123,7 +121,6 @@ def extract_text_from_pdf(file) -> str:
 def make_word_freq_chart(text: str, top_k=20):
     text = text.lower()
-    # lightweight stopword list
     stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
     tokens = re.findall(r"[a-zA-Z]{3,}", text)
     freq = {}
@@ -203,14 +200,12 @@ if uploaded:
             st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
         with st.spinner("Thinking..."):
             try:
-                # embed once/cache
                 if st.session_state.chunk_vecs is None:
                     vecs = embed_texts(st.session_state.chunks)
                     st.session_state.chunk_vecs = vecs
                 else:
                     vecs = st.session_state.chunk_vecs
-                # question embedding
                 q_vec = embed_texts([question])
                 sims = cosine_sim(q_vec, vecs).flatten()
                 top_idx = np.argsort(sims)[::-1][:3]
@@ -235,6 +230,3 @@ if uploaded:
 else:
     st.info("Upload a PDF to get started.")
-'''
-Path("app.py").write_text(app_code, encoding="utf-8")
-print("Wrote app.py")

 import os, io, re, json, base64, requests, numpy as np
 import streamlit as st
 from pypdf import PdfReader
 def make_word_freq_chart(text: str, top_k=20):
     text = text.lower()
     stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
     tokens = re.findall(r"[a-zA-Z]{3,}", text)
     freq = {}
             st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
         with st.spinner("Thinking..."):
             try:
                 if st.session_state.chunk_vecs is None:
                     vecs = embed_texts(st.session_state.chunks)
                     st.session_state.chunk_vecs = vecs
                 else:
                     vecs = st.session_state.chunk_vecs
                 q_vec = embed_texts([question])
                 sims = cosine_sim(q_vec, vecs).flatten()
                 top_idx = np.argsort(sims)[::-1][:3]
 else:
     st.info("Upload a PDF to get started.")