Spaces:
Running
Running
| import asyncio | |
| import os | |
| import re | |
| import pdfplumber | |
| import streamlit as st | |
| import torch | |
| from transformers import pipeline | |
| from dataclasses import dataclass | |
| from streamlit_pdf_viewer import pdf_viewer | |
| from pydantic_ai import Agent, RunContext, Tool | |
| from pydantic_ai.models.groq import GroqModel | |
| from pydantic_ai.messages import ModelMessage | |
| import presentation as customClass | |
| import nest_asyncio | |
| # Load API key | |
| api_key = os.getenv("API_KEY") | |
| if not api_key: | |
| raise ValueError("API_KEY is not set in the environment variables.") | |
| data = [] | |
| result_data: list[customClass.PPT] = [] | |
| # Initialize models | |
| model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key) | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def split_into_token_chunks(text: str, max_tokens: int = 300) -> list: | |
| """ | |
| Splits a long string into chunks of a specified maximum number of tokens (words). | |
| """ | |
| tokens = text.split() | |
| return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)] | |
| def return_data() -> str: | |
| """Returns concatenated extracted data.""" | |
| return "\n".join(data) | |
| class SupportDependencies: | |
| db: str | |
| async def ppt_content(data): | |
| """ | |
| Generates PowerPoint content using an AI model. | |
| """ | |
| if not data: | |
| raise ValueError("No valid text found for PowerPoint generation.") | |
| agent = Agent( | |
| model, | |
| result_type=customClass.PPT, | |
| tools=[return_data], | |
| system_prompt=""" | |
| You are an expert in creating PowerPoint presentations. | |
| Create 5 slides: | |
| 1. Title Slide: Introduction about the presentation. | |
| 2. Methodology Slide: Summarize the methodology in detail. | |
| 3. Results Slide: Present key findings in bullet points. | |
| 4. Discussion Slide: Summarize implications and limitations. | |
| 5. Conclusion Slide: State the overall conclusion. | |
| Each slide should have: | |
| - Title: Clear and concise. | |
| - Text: Short and informative explanation. | |
| - Bullet Points: 3-5 summarized key takeaways. | |
| """ | |
| ) | |
| listOfString = split_into_token_chunks("\n".join(data)) | |
| message_history: list[ModelMessage] = [] | |
| result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history) | |
| for i in range(1, len(listOfString)): | |
| result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages()) | |
| print(result.data) | |
| def ai_ppt(data): | |
| """Runs the PowerPoint generation in an async loop.""" | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| loop.run_until_complete(ppt_content(data=data)) | |
| def extract_data(feed): | |
| """Extracts text from PDF and appends to `data` list.""" | |
| global data | |
| data = [] # Reset data before extracting | |
| with pdfplumber.open(feed) as pdf: | |
| for p in pdf.pages: | |
| text = p.extract_text() | |
| if text: | |
| data.append(text) | |
| def main(): | |
| """Main Streamlit app function.""" | |
| st.title("AI-Powered PowerPoint Generator") | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| extract_data(uploaded_file) | |
| if st.button("Generate PPT"): | |
| try: | |
| ai_ppt(data) | |
| st.success("PowerPoint generation completed!") | |
| except Exception as e: | |
| st.error(f"Error generating PPT: {e}") | |
| # Display PDF | |
| binary_data = uploaded_file.getvalue() | |
| pdf_viewer(input=binary_data, width=700) | |
| if __name__ == '__main__': | |
| nest_asyncio.apply() | |
| main() | |