Spaces:
Build error
Build error
| import os | |
| import tempfile | |
| import streamlit as st | |
| import fitz # PyMuPDF for PDFs | |
| import together | |
| from fpdf import FPDF | |
| from dotenv import load_dotenv | |
| from unidecode import unidecode | |
| # Load API key | |
| load_dotenv() | |
| together_api_key = os.getenv("TOGETHER_API_KEY") | |
| os.environ["TOGETHER_API_KEY"] = together_api_key | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| text = "\n".join([page.get_text("text") for page in doc]) | |
| return text | |
| def summarize_with_llama(text): | |
| prompt = f""" | |
| Extract key points from the following study notes while maintaining **strict adherence** to the provided material. | |
| ### **Guidelines:** | |
| 1. **Do not add, infer, or introduce** new topics, explanations, or external examples. | |
| 2. **Do not paraphrase inaccurately**—preserve the original structure and intent. | |
| 3. **Retain all key points and bullet points** while removing redundant information. | |
| 4. **Maintain the section structure** (e.g., headings, bullet points). | |
| 5. **If the text exceeds the token limit**, summarize each section independently while keeping accuracy. | |
| 6. Avoid using the asterisk on the output. | |
| --- | |
| **STUDY NOTES:** | |
| {text} | |
| --- | |
| **EXTRACTED KEY POINTS:** | |
| """ | |
| response = together.Completion.create( | |
| model="mistralai/Mistral-7B-Instruct-v0.1", | |
| prompt=prompt, | |
| max_tokens=2000, # Reduce max tokens to prevent hallucination | |
| temperature=0.0 # Reduce randomness | |
| ) | |
| return response.choices[0].text.strip() | |
| def clean_text(text): | |
| return unidecode(text) # Converts fancy quotes, bullets, etc. into ASCII equivalents | |
| def generate_pdf(summary_text): | |
| summary_text = clean_text(summary_text) # Ensure ASCII-only text | |
| pdf = FPDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.add_page() | |
| pdf.set_font("Arial", size=12) | |
| pdf.multi_cell(0, 10, summary_text) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") | |
| pdf.output(temp_file.name, "F") | |
| return temp_file.name | |
| # Streamlit UI | |
| st.title("Study Note Summarizer") | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: | |
| temp_pdf.write(uploaded_file.read()) | |
| pdf_path = temp_pdf.name | |
| extracted_text = extract_text_from_pdf(pdf_path) | |
| st.text_area("Extracted Text", extracted_text, height=200) | |
| if st.button("Summarize Notes"): | |
| summary = summarize_with_llama(extracted_text) | |
| st.text_area("Summarized Notes", summary, height=200) | |
| summary_pdf_path = generate_pdf(summary) | |
| with open(summary_pdf_path, "rb") as file: | |
| st.download_button("Download Summary PDF", file, file_name="summary.pdf", mime="application/pdf") | |