File size: 2,982 Bytes
5b1cb60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import tempfile
import streamlit as st
import fitz  # PyMuPDF for PDFs
import together
from fpdf import FPDF
from dotenv import load_dotenv
from unidecode import unidecode


# Load API key
load_dotenv()
together_api_key = os.getenv("TOGETHER_API_KEY")
os.environ["TOGETHER_API_KEY"] = together_api_key

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = "\n".join([page.get_text("text") for page in doc])
    return text

def summarize_with_llama(text):
    prompt = f"""

    Extract key points from the following study notes while maintaining **strict adherence** to the provided material.  



    ### **Guidelines:**

    1. **Do not add, infer, or introduce** new topics, explanations, or external examples.

    2. **Do not paraphrase inaccurately**—preserve the original structure and intent.

    3. **Retain all key points and bullet points** while removing redundant information.

    4. **Maintain the section structure** (e.g., headings, bullet points).

    5. **If the text exceeds the token limit**, summarize each section independently while keeping accuracy.

    6. Avoid using the asterisk on the output.

    ---  

    **STUDY NOTES:**  

    {text}  

    ---

    

    **EXTRACTED KEY POINTS:**

    """

    response = together.Completion.create(
        model="mistralai/Mistral-7B-Instruct-v0.1",
        prompt=prompt,
        max_tokens=2000,  # Reduce max tokens to prevent hallucination
        temperature=0.0  # Reduce randomness
    )
    
    return response.choices[0].text.strip()

def clean_text(text):
    return unidecode(text)  # Converts fancy quotes, bullets, etc. into ASCII equivalents

def generate_pdf(summary_text):
    summary_text = clean_text(summary_text)  # Ensure ASCII-only text

    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary_text)

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
    pdf.output(temp_file.name, "F")

    return temp_file.name
# Streamlit UI
st.title("Study Note Summarizer")

uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        temp_pdf.write(uploaded_file.read())
        pdf_path = temp_pdf.name
    
    extracted_text = extract_text_from_pdf(pdf_path)
    st.text_area("Extracted Text", extracted_text, height=200)
    
    if st.button("Summarize Notes"):
        summary = summarize_with_llama(extracted_text)
        st.text_area("Summarized Notes", summary, height=200)
        
        summary_pdf_path = generate_pdf(summary)
        with open(summary_pdf_path, "rb") as file:
            st.download_button("Download Summary PDF", file, file_name="summary.pdf", mime="application/pdf")