MechanicalClerk / app.py
WhaleCancer's picture
Update app.py
f52ca81
import gradio as gr
import gnspg as gnspg
import redirect as redirect
import processAgain as processAgain
import askURL
def set_list(file):
with open(file.name, "r") as f:
text = f.read()
return text
def process_files(website_text_list, subject_text_list, topic_text_list, websiteSearchMode, txt_projectName):
print('run process_files...')
file = gnspg.generate_prompt_v2(website_text_list, subject_text_list, topic_text_list, websiteSearchMode, txt_projectName)
print('result is: ' + file.name)
return file.name
def transfer_file(from_element):
return from_element.name
with gr.Blocks() as demo:
with gr.Tab("๐Ÿ“– Instructions"):
gr.HTML("<p>Hello! The <a href='https://en.wikipedia.org/wiki/Mechanical_Turk'>Mechanical Clerk</a> is a program intended to streamline research tasks. This program will be updated continously with functions. As it stands, it has two functions: Google News Matrix Search and OpenAI Search Result Interrogation</p>")
gr.HTML("<p>๐Ÿ—’๏ธ The spiral notebook emoji indicates a '.txt' file is expected. All '.txt' files accepted by the Mechanical Clerk are comma seperated lists on a single line.</p>")
gr.HTML("<p>๐Ÿ—ƒ๏ธ The index card catalogue emoji indicates a '.tsv' file is expected. All '.tsv' files accepted by the Mechanical Clerk are formatted with the same 5 columns 'Title', 'Date', 'Media', 'Link', and 'Tags'. There may be additional columns added or expected by other functions.</p>")
gr.HTML("<h2>๐Ÿ“ฐ Google News Matrix Search</h2>")
gr.HTML("<p>This program takes in a list of Subjects and Websites. A list of Topics can also be supplied. The Mechanical Clerk then creates and executes a search matrix based on your inputs. The output of this process is a .csv file with the Title, Media, Date, Link, and Tag as column headers. Tags are are decided by combinging the list of Subjects and Stopics (if any).</p>")
gr.HTML("<h3>Project Name</h3>")
gr.HTML("<p>A name for this project. This is only used for naming various output files.</p>")
gr.HTML("<h3>๐Ÿง‘๐Ÿ™๏ธ๐Ÿ‘” Subjects</h3>")
gr.HTML("<p>For this tool, this is the main subject or subjects of your search. You can enter anything in this you want, but the intended use is that this be a list of proper nouns. By selecting only proper nouns, you can ensure relevant and manageable results. Suggested types of proper nouns are full names (e.g. 'David Eby'), locations (e.g. 'City of Vancouver, Vancouver'), and organization names (e.g. 'Coastal GasLink, CGL').</p>")
gr.HTML("<h3>โ˜€๏ธ๐ŸŒก๏ธ๐Ÿ”ฅTopics (Optional)</h3>")
gr.HTML("<p>For this tool, these are topics to limit the search to. The search will only return results that fit any one of the given subjects and at least one topic.</p>")
gr.HTML("<h3>๐ŸŒ๐Ÿ”—๐Ÿ“  Websites</h3>")
gr.HTML("<p>This is a list of news websites. Google determines what sites do or do not show up in Google News, and it is probably broader than what you might expect. When using this tool in whitelist mode, only the selected sites will be searched. When using this tool in blacklist mode, the selected sites will be excluded from searches. The recommend use of these modes is to search your subjects + topics in blacklist mode, find all the relevant news sites you want to include in your final output list, and then make that the website list for the whitelist mode search.</p>")
gr.HTML("<h3>๐Ÿ”ฎ๐Ÿง™โœจ Generating results...</h3>")
gr.HTML("<p>Once you setup the subjects, topics (if we choose to include them), and websites, we can generate some results.</p>")
gr.HTML("<p>Generate. This step generates the list of searches that will be sent to Google News sequentially.</p>")
gr.HTML("<p>Execute. This executes every search in the generated list. Results are tagged with subjects and topics.</p>")
gr.HTML("<p>Process. This step takes the Google News redirect link and replaces it with the actual link to the article.</p>")
gr.HTML("<p>Include. This steps takes the text of the article and includes it in a column. This is needed for AI interrogation of article content, and not really intended to be useful to humans. If you are not using the AI Interrogation function, this isn't really useful.</p>")
gr.HTML("<h2>๐Ÿค– OpenAI Search Result Interrogation</h2>")
gr.HTML("<p>This program allows you to interrogate the results of a Google News Matrix Search using GPT. In simple terms, it allows you to ask the same question of each article's contents and then store the responses in a new column. One way you could use this is to ask for a summary of each article.</p>")
with gr.Tab("๐Ÿ“ฐ Google News Matrix Search"):
with gr.Row():
txt_projectName = gr.Textbox(label="Project Name", info="A short name to be used for this project. Used only in output file names.")
with gr.Row():
gr.HTML("<h2>Input Configuration</h2>")
with gr.Row():
gr.HTML("<h3>Subjects</h3>")
with gr.Row():
with gr.Column(scale=4):
subject_text_list = gr.Textbox(lines=2, show_label=False, placeholder='A comma separated list of subjects...')
with gr.Column(scale=1):
uploaded_subjectList = gr.File(label="๐Ÿ—’๏ธ Subject List", file_types=[".txt"], height=85)
with gr.Row():
gr.HTML("<h3>Topics (Optional)</h3>")
with gr.Row():
with gr.Column(scale=4):
topic_text_list = gr.Textbox(lines=2, show_label=False, placeholder='A comma separated list of topics...')
with gr.Column(scale=1):
uploaded_topicList = gr.File(label="๐Ÿ—’๏ธ Topic List", file_types=[".txt"], height=85)
with gr.Row():
gr.HTML("<h3>Websites</h3>")
with gr.Row():
with gr.Column(scale=4):
website_text_list = gr.Textbox(lines=2, show_label=False, placeholder='A comma separated list of websites...')
with gr.Column(scale=1):
uploaded_websiteList = gr.File(label="๐Ÿ—’๏ธ Website List", file_types=[".txt"], height=85)
with gr.Row():
websiteSearchMode = gr.Radio(["Whitelist", "Blacklist", "Aggressive"], label="Website Search Mode")
with gr.Row():
gr.HTML("<h2>Generate Results</h2>")
with gr.Row():
with gr.Column():
btn_generateSearchMatrix = gr.Button("๐Ÿ‡ Generate [Search Matrix]")
with gr.Column():
uploaded_SearchMatrix = gr.File(label="๐Ÿ—’๏ธ [Search Matrix]", file_types=['.txt'], height=85)
with gr.Row():
with gr.Column():
btn_executeSearchMatrix = gr.Button("๐ŸŒ Execute [Search Matrix]")
with gr.Column():
uploaded_SearchResults = gr.File(label="๐Ÿ—ƒ๏ธ [Search Matrix Results]", file_types=['.tsv'], height=85)
with gr.Row():
with gr.Column():
btn_preprocessSearchResults = gr.Button("๐Ÿข Process [Search Matrix Results]")
with gr.Column():
file_preprocessedSearchResults = gr.File(label="๐Ÿ—ƒ๏ธ [Cleaned Search Matrix Results]", file_types=['.tsv'], height=85)
with gr.Row():
with gr.Column():
btn_AttachArticles = gr.Button("๐ŸŒ Include [Processed Search Matrix Results]")
with gr.Column():
file_AttachedSearchResults = gr.File(label="๐Ÿ—ƒ๏ธ [Cleaned Search Matrix Results Including Articles]", file_types=['.tsv'], height=85)
# tokenizer
# with gr.Row():
# with gr.Column():
# gr.Button('')
# with gr.Column():
# gr.File('')
with gr.Row():
btn_sendToAIInterrogationTab = gr.Button('Send to AI Interrogation Tab', variant='primary')
with gr.Tab("๐Ÿค– AI Interrogation"):
with gr.Row():
gr.HTML("<h2>AI Interrogation</h2>")
with gr.Row():
file_interrogatedInput = gr.File(label="๐Ÿ—ƒ๏ธ [Input Interrogated Results]", file_types=['.tsv'], height=85)
with gr.Row():
openAI_api_key = api_keyTextbox = gr.Textbox(lines=1, label="openai.api_key", info="Only needed if you want to use AI functionality.", type='password')
with gr.Row():
textRowHeader = gr.Textbox(lines=1, label="Column Header", placeholder="The header for the column that will contain the answer...")
with gr.Row():
textInquiryPrompt = gr.Textbox(lines=4, label="Inquiry or Directive", placeholder="Ask a question or give a direction to the AI.")
with gr.Row():
btn_interrogatePreprocessedSearchResults = gr.Button('๐ŸŒ๐Ÿ’ธ AI Interrogation')
with gr.Row():
file_interrogatedResults = gr.File(label="๐Ÿค– [Output Interrogated Results]", file_types=['.tsv'], height=85)
with gr.Row():
btn_sendToTopOfTheAIInterrogationTab = gr.Button('Send to top of the AI Interrogation Tab', variant='primary')
# with gr.Row():
# gr.ClearButton(value="๐Ÿ—‘๏ธ Clear All", components=[subject_text_list, uploaded_subjectList, topic_text_list, uploaded_topicList, website_text_list, uploaded_websiteList, websiteSearchMode, uploaded_SearchMatrix, uploaded_SearchResults])
# upload buttons
uploaded_subjectList.upload(fn=lambda file: subject_text_list.update(set_list(file)) if file else None,
inputs=[uploaded_subjectList],
outputs=[subject_text_list])
uploaded_topicList.upload(fn=lambda file: topic_text_list.update(set_list(file)) if file else None,
inputs=[uploaded_topicList],
outputs=[topic_text_list])
uploaded_websiteList.upload(fn=lambda file: website_text_list.update(set_list(file)) if file else None,
inputs=[uploaded_websiteList],
outputs=[website_text_list])
# generate the matrix
btn_generateSearchMatrix.click(process_files,
inputs=[website_text_list, subject_text_list, topic_text_list, websiteSearchMode, txt_projectName],
outputs=[uploaded_SearchMatrix])
# execute the search matrix
btn_executeSearchMatrix.click(gnspg.execute_prompt,
inputs=[uploaded_SearchMatrix, txt_projectName],
outputs=[uploaded_SearchResults])
# preprocess the search results (update googlenews urls to clear urls)
btn_preprocessSearchResults.click(redirect.process_csv, inputs=[uploaded_SearchResults, txt_projectName], outputs=[file_preprocessedSearchResults])
# attach the articles to the search index
btn_AttachArticles.click(processAgain.attach_articles, inputs=[file_preprocessedSearchResults], outputs=[file_AttachedSearchResults])
# transfer_file
btn_sendToAIInterrogationTab.click(transfer_file, inputs=[file_AttachedSearchResults], outputs=[file_interrogatedInput])
# NEXT TAB - AI TAB
# the ai looking over the thing...
btn_interrogatePreprocessedSearchResults.click(processAgain.process_data, inputs=[file_interrogatedInput, textRowHeader, textInquiryPrompt, openAI_api_key], outputs=[file_interrogatedResults])
# transfer_file
btn_sendToTopOfTheAIInterrogationTab.click(transfer_file, inputs=[file_interrogatedResults], outputs=[file_interrogatedInput])
demo.launch()