CristopherWVSU commited on
Commit
5b1cb60
·
verified ·
1 Parent(s): 63fb0f9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +85 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import streamlit as st
4
+ import fitz # PyMuPDF for PDFs
5
+ import together
6
+ from fpdf import FPDF
7
+ from dotenv import load_dotenv
8
+ from unidecode import unidecode
9
+
10
+
11
+ # Load API key
12
+ load_dotenv()
13
+ together_api_key = os.getenv("TOGETHER_API_KEY")
14
+ os.environ["TOGETHER_API_KEY"] = together_api_key
15
+
16
+ # Function to extract text from PDF
17
+ def extract_text_from_pdf(pdf_path):
18
+ doc = fitz.open(pdf_path)
19
+ text = "\n".join([page.get_text("text") for page in doc])
20
+ return text
21
+
22
+ def summarize_with_llama(text):
23
+ prompt = f"""
24
+ Extract key points from the following study notes while maintaining **strict adherence** to the provided material.
25
+
26
+ ### **Guidelines:**
27
+ 1. **Do not add, infer, or introduce** new topics, explanations, or external examples.
28
+ 2. **Do not paraphrase inaccurately**—preserve the original structure and intent.
29
+ 3. **Retain all key points and bullet points** while removing redundant information.
30
+ 4. **Maintain the section structure** (e.g., headings, bullet points).
31
+ 5. **If the text exceeds the token limit**, summarize each section independently while keeping accuracy.
32
+ 6. Avoid using the asterisk on the output.
33
+ ---
34
+ **STUDY NOTES:**
35
+ {text}
36
+ ---
37
+
38
+ **EXTRACTED KEY POINTS:**
39
+ """
40
+
41
+ response = together.Completion.create(
42
+ model="mistralai/Mistral-7B-Instruct-v0.1",
43
+ prompt=prompt,
44
+ max_tokens=2000, # Reduce max tokens to prevent hallucination
45
+ temperature=0.0 # Reduce randomness
46
+ )
47
+
48
+ return response.choices[0].text.strip()
49
+
50
+ def clean_text(text):
51
+ return unidecode(text) # Converts fancy quotes, bullets, etc. into ASCII equivalents
52
+
53
+ def generate_pdf(summary_text):
54
+ summary_text = clean_text(summary_text) # Ensure ASCII-only text
55
+
56
+ pdf = FPDF()
57
+ pdf.set_auto_page_break(auto=True, margin=15)
58
+ pdf.add_page()
59
+
60
+ pdf.set_font("Arial", size=12)
61
+ pdf.multi_cell(0, 10, summary_text)
62
+
63
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
64
+ pdf.output(temp_file.name, "F")
65
+
66
+ return temp_file.name
67
+ # Streamlit UI
68
+ st.title("Study Note Summarizer")
69
+
70
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
71
+ if uploaded_file is not None:
72
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
73
+ temp_pdf.write(uploaded_file.read())
74
+ pdf_path = temp_pdf.name
75
+
76
+ extracted_text = extract_text_from_pdf(pdf_path)
77
+ st.text_area("Extracted Text", extracted_text, height=200)
78
+
79
+ if st.button("Summarize Notes"):
80
+ summary = summarize_with_llama(extracted_text)
81
+ st.text_area("Summarized Notes", summary, height=200)
82
+
83
+ summary_pdf_path = generate_pdf(summary)
84
+ with open(summary_pdf_path, "rb") as file:
85
+ st.download_button("Download Summary PDF", file, file_name="summary.pdf", mime="application/pdf")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ together # For interacting with the Llama model API
2
+ streamlit # For creating the UI with file upload and button interaction
3
+ unidecode
4
+ dotenv
5
+ fitz # PyMuPDF for PDFs
6
+ fpdf