StudySummarizer / app.py
CristopherWVSU's picture
Upload 2 files
5b1cb60 verified
import os
import tempfile
import streamlit as st
import fitz # PyMuPDF for PDFs
import together
from fpdf import FPDF
from dotenv import load_dotenv
from unidecode import unidecode
# Load API key
load_dotenv()
together_api_key = os.getenv("TOGETHER_API_KEY")
os.environ["TOGETHER_API_KEY"] = together_api_key
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = "\n".join([page.get_text("text") for page in doc])
return text
def summarize_with_llama(text):
prompt = f"""
Extract key points from the following study notes while maintaining **strict adherence** to the provided material.
### **Guidelines:**
1. **Do not add, infer, or introduce** new topics, explanations, or external examples.
2. **Do not paraphrase inaccurately**—preserve the original structure and intent.
3. **Retain all key points and bullet points** while removing redundant information.
4. **Maintain the section structure** (e.g., headings, bullet points).
5. **If the text exceeds the token limit**, summarize each section independently while keeping accuracy.
6. Avoid using the asterisk on the output.
---
**STUDY NOTES:**
{text}
---
**EXTRACTED KEY POINTS:**
"""
response = together.Completion.create(
model="mistralai/Mistral-7B-Instruct-v0.1",
prompt=prompt,
max_tokens=2000, # Reduce max tokens to prevent hallucination
temperature=0.0 # Reduce randomness
)
return response.choices[0].text.strip()
def clean_text(text):
return unidecode(text) # Converts fancy quotes, bullets, etc. into ASCII equivalents
def generate_pdf(summary_text):
summary_text = clean_text(summary_text) # Ensure ASCII-only text
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, summary_text)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
pdf.output(temp_file.name, "F")
return temp_file.name
# Streamlit UI
st.title("Study Note Summarizer")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
temp_pdf.write(uploaded_file.read())
pdf_path = temp_pdf.name
extracted_text = extract_text_from_pdf(pdf_path)
st.text_area("Extracted Text", extracted_text, height=200)
if st.button("Summarize Notes"):
summary = summarize_with_llama(extracted_text)
st.text_area("Summarized Notes", summary, height=200)
summary_pdf_path = generate_pdf(summary)
with open(summary_pdf_path, "rb") as file:
st.download_button("Download Summary PDF", file, file_name="summary.pdf", mime="application/pdf")