import os import tempfile import streamlit as st import fitz # PyMuPDF for PDFs import together from fpdf import FPDF from dotenv import load_dotenv from unidecode import unidecode # Load API key load_dotenv() together_api_key = os.getenv("TOGETHER_API_KEY") os.environ["TOGETHER_API_KEY"] = together_api_key # Function to extract text from PDF def extract_text_from_pdf(pdf_path): doc = fitz.open(pdf_path) text = "\n".join([page.get_text("text") for page in doc]) return text def summarize_with_llama(text): prompt = f""" Extract key points from the following study notes while maintaining **strict adherence** to the provided material. ### **Guidelines:** 1. **Do not add, infer, or introduce** new topics, explanations, or external examples. 2. **Do not paraphrase inaccurately**—preserve the original structure and intent. 3. **Retain all key points and bullet points** while removing redundant information. 4. **Maintain the section structure** (e.g., headings, bullet points). 5. **If the text exceeds the token limit**, summarize each section independently while keeping accuracy. 6. Avoid using the asterisk on the output. --- **STUDY NOTES:** {text} --- **EXTRACTED KEY POINTS:** """ response = together.Completion.create( model="mistralai/Mistral-7B-Instruct-v0.1", prompt=prompt, max_tokens=2000, # Reduce max tokens to prevent hallucination temperature=0.0 # Reduce randomness ) return response.choices[0].text.strip() def clean_text(text): return unidecode(text) # Converts fancy quotes, bullets, etc. into ASCII equivalents def generate_pdf(summary_text): summary_text = clean_text(summary_text) # Ensure ASCII-only text pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, summary_text) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") pdf.output(temp_file.name, "F") return temp_file.name # Streamlit UI st.title("Study Note Summarizer") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: temp_pdf.write(uploaded_file.read()) pdf_path = temp_pdf.name extracted_text = extract_text_from_pdf(pdf_path) st.text_area("Extracted Text", extracted_text, height=200) if st.button("Summarize Notes"): summary = summarize_with_llama(extracted_text) st.text_area("Summarized Notes", summary, height=200) summary_pdf_path = generate_pdf(summary) with open(summary_pdf_path, "rb") as file: st.download_button("Download Summary PDF", file, file_name="summary.pdf", mime="application/pdf")