Spaces:

ayang903
/

content_generator

Sleeping

App Files Files Community

content_generator / src /summarizer.py

ayang903

Update src/summarizer.py

de255a3 about 2 years ago

raw

history blame contribute delete

2.53 kB

	import os
	import json
	import openai
	import pandas as pd


	def summarize(filename, gpt_key, model_name):
	openai.api_key = gpt_key

	# Opening created all data json file
	f = open(filename)
	allDataFile = json.load(f)

	finaldf = pd.DataFrame()

	for source, articles in allDataFile.items():
	for article in articles:

	title = article['title']
	text = article['text']
	combined_text = 'title: ' + title + '\n' + text

	try:
	# GPT-3.5 API for summarization
	response = openai.ChatCompletion.create(
	model=model_name,
	messages=[{
	"role": "system",
	"content": "You are a helpful assistant."
	}, {
	"role":
	"user",
	"content":
	f"Please summarize this news article text or youtube video transcript in four sentences or less. If no article/transcript is present, or it is unclear what the transcript is talking about, output 'Unable to summarize.'. {combined_text} "
	}])

	summarizedData = response['choices'][0]['message']['content']
	print(f"SUMMARY: {summarizedData} \n\n")

	# GPT-3.5 API for talking points from summarization generated
	follow_up = openai.ChatCompletion.create(
	model=model_name,
	messages=[{
	"role": "system",
	"content": "You are a helpful assistant."
	}, {
	"role":
	"user",
	"content":
	f"Using this article, give me five sequential talking points that I can use to make a shortform video. Do not use more than 100 words. If the summarized article says 'Unable to summarize,' output 'No talking points available'. {summarizedData}"
	}])

	talking_pointsData = follow_up['choices'][0]['message']['content']
	print(f"TALKING POINTS: {talking_pointsData} \n\n")

	articleinfo = pd.DataFrame.from_records([{
	"title":
	article["title"],
	"source":
	source,
	"url":
	article["url"],
	"summarized_text":
	summarizedData,
	"talking_points":
	talking_pointsData
	}])
	finaldf = pd.concat([finaldf, articleinfo], ignore_index=True)

	except openai.error.InvalidRequestError as e:
	print(f"An error occurred: {e}")
	continue

	csvname = 'data.csv'
	finaldf.to_csv(csvname, index=False)
	return csvname