Spaces:

sarim
/

ppt

Sleeping

App Files Files Community

ppt / app.py

samvish

Update app.py

c8ecbd4 verified 11 months ago

raw

history blame

3.77 kB

	import asyncio
	import os
	import re
	import pdfplumber
	import streamlit as st
	import torch
	from transformers import pipeline
	from dataclasses import dataclass
	from streamlit_pdf_viewer import pdf_viewer
	from pydantic_ai import Agent, RunContext, Tool
	from pydantic_ai.models.groq import GroqModel
	from pydantic_ai.messages import ModelMessage
	import presentation as customClass
	import nest_asyncio

	# Load API key
	api_key = os.getenv("API_KEY")
	if not api_key:
	raise ValueError("API_KEY is not set in the environment variables.")

	data = []
	result_data: list[customClass.PPT] = []

	# Initialize models
	model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key)
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
	"""
	Splits a long string into chunks of a specified maximum number of tokens (words).
	"""
	tokens = text.split()
	return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]

	def return_data() -> str:
	"""Returns concatenated extracted data."""
	return "\n".join(data)

	@dataclass
	class SupportDependencies:
	db: str

	async def ppt_content(data):
	"""
	Generates PowerPoint content using an AI model.
	"""
	if not data:
	raise ValueError("No valid text found for PowerPoint generation.")

	agent = Agent(
	model,
	result_type=customClass.PPT,
	tools=[return_data],
	system_prompt="""
	You are an expert in creating PowerPoint presentations.
	Create 5 slides:
	1. Title Slide: Introduction about the presentation.
	2. Methodology Slide: Summarize the methodology in detail.
	3. Results Slide: Present key findings in bullet points.
	4. Discussion Slide: Summarize implications and limitations.
	5. Conclusion Slide: State the overall conclusion.

	Each slide should have:
	- Title: Clear and concise.
	- Text: Short and informative explanation.
	- Bullet Points: 3-5 summarized key takeaways.
	"""
	)

	listOfString = split_into_token_chunks("\n".join(data))
	message_history: list[ModelMessage] = []

	result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history)

	for i in range(1, len(listOfString)):
	result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages())

	print(result.data)

	def ai_ppt(data):
	"""Runs the PowerPoint generation in an async loop."""
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	loop.run_until_complete(ppt_content(data=data))

	def extract_data(feed):
	"""Extracts text from PDF and appends to `data` list."""
	global data
	data = [] # Reset data before extracting
	with pdfplumber.open(feed) as pdf:
	for p in pdf.pages:
	text = p.extract_text()
	if text:
	data.append(text)

	def main():
	"""Main Streamlit app function."""
	st.title("AI-Powered PowerPoint Generator")

	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	if uploaded_file is not None:
	extract_data(uploaded_file)

	if st.button("Generate PPT"):
	try:
	ai_ppt(data)
	st.success("PowerPoint generation completed!")
	except Exception as e:
	st.error(f"Error generating PPT: {e}")

	# Display PDF
	binary_data = uploaded_file.getvalue()
	pdf_viewer(input=binary_data, width=700)

	if __name__ == '__main__':
	nest_asyncio.apply()
	main()