MechanicalClerk / processAgain.py
WhaleCancer's picture
zz
99f1718
import csv
import askURL as askURL
import gradio as gr
def process_data(file, col_header, user_message, openAI_api_key):
if openAI_api_key == '':
raise gr.Error("You need to enter an API key")
elif user_message == '':
raise gr.Error("You need to enter user_message")
elif col_header == '':
raise gr.Error("You need to enter col_header")
elif file is None:
raise gr.Error("You need to enter file")
else:
# Open the input file
with open(file.name, 'r', encoding="utf-8") as input_file:
# Read the contents of the input file
reader = csv.DictReader(input_file, delimiter='\t')
data = list(reader)
# iterate through every row in the input_file
for row in data:
# ...get the URL
url = row['Link']
# ...send the URL and the user message to the ask function
response = askURL.process_url(url, user_message, openAI_api_key)
row[col_header] = response
output_fname = 'output_2zzz.tsv'
# Write the processed data to a new file
with open(output_fname, 'w') as output_file:
# Define the fieldnames for the output file
fieldnames = list(reader.fieldnames) + [col_header]
# Write the header
writer = csv.DictWriter(output_file, delimiter='\t', fieldnames=fieldnames, lineterminator='\n')
writer.writeheader()
# Write the processed data
writer.writerows(data)
return(output_fname)
def attach_articles(file):
# Open the input file
with open(file.name, 'r', encoding="utf-8") as input_file:
# Read the contents of the input file
reader = csv.DictReader(input_file, delimiter='\t')
data = list(reader)
rowcount = 0
# iterate through every row in the input_file
for row in data:
rowcount += 1
print('rowcount '+str(rowcount)+' of '+str(len(data)))
# ...get the URL
url = row['Link']
# ...send the URL and the user message to the ask function
content = askURL.get_main_text_from_url(url)
content = content.replace('\t', ' ').replace('\n', ' ')
row["Content"] = content
output_fname = 'output_2zzz.tsv'
# Write the processed data to a new file
with open(output_fname, 'w') as output_file:
# Define the fieldnames for the output file
fieldnames = list(reader.fieldnames) + ["Content"]
# Write the header
writer = csv.DictWriter(output_file, delimiter='\t', fieldnames=fieldnames, lineterminator='\n')
writer.writeheader()
# Write the processed data
writer.writerows(data)
return(output_fname)