Spaces:
Build error
Build error
| import pandas as pd # for data manipulation (pip install pandas) | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.chains import create_extraction_chain | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import ChatPromptTemplate | |
| import gradio as gr | |
| import os | |
| import collections | |
| # Schema | |
| schema = { | |
| "properties": { | |
| "keyword": {"type": "string"}, | |
| "category": {"type": "string"}, | |
| }, | |
| "required": ["keyword", "category"], | |
| } | |
| # Input | |
| prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ("system", "You are an expert marketing researcher"), | |
| ("human", """{prompt_input}. | |
| Here you have the categories splitted by coma: {categories}. | |
| and Here you have the keywords splitted by coma: {keywords}."""), | |
| ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), | |
| ] | |
| ) | |
| prompt_no_cat = ChatPromptTemplate.from_messages( | |
| [ | |
| ("system", "You are an expert marketing researcher"), | |
| ("human", """{prompt_input}. | |
| and Here you have the keywords splitted by coma: {keywords}."""), | |
| ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), | |
| ] | |
| ) | |
| llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OpenAI_APIKEY"), model="gpt-3.5-turbo") | |
| chain = create_extraction_chain(schema, llm, prompt, verbose=1) | |
| chain_no_cat = create_extraction_chain(schema, llm, prompt_no_cat, verbose=1) | |
| def run_chain(input_prompt, keywords_file, categories_file=None, batch_size=50): | |
| results = [] | |
| batch_size = batch_size | |
| index = 0 | |
| try: | |
| keywords = pd.read_csv(keywords_file.name) | |
| except: | |
| keywords = pd.read_excel(keywords_file.name) | |
| if categories_file != None: | |
| try: | |
| categories = pd.read_csv(categories_file.name) | |
| except: | |
| categories = pd.read_excel(categories_file.name) | |
| categories = list(categories[categories.columns[0]].values) | |
| keywords = list(keywords[keywords.columns[0]].values) | |
| while index < len(keywords): | |
| try: | |
| batch = keywords[index:index+batch_size] | |
| except: | |
| batch = keywords[index:] | |
| try: | |
| result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)}) | |
| except Exception as E: | |
| print('this batch did not worked from {} to {}'.format(index, index + batch_size)) | |
| print(E) | |
| result = [] | |
| results += result | |
| index += batch_size | |
| results_to_csv(results) | |
| #print((index, batch_size, len(keywords))) | |
| return results, 'themes_results.csv' | |
| else: | |
| keywords = list(keywords[keywords.columns[0]].values) | |
| batch_size = len(keywords) | |
| while index < len(keywords): | |
| try: | |
| batch = keywords[index:index+batch_size] | |
| except: | |
| batch = keywords[index:] | |
| try: | |
| result = chain_no_cat.run({'prompt_input':input_prompt, 'keywords':','.join(batch)}) | |
| except Exception as E: | |
| print('this batch did not worked from {} to {}'.format(index, index + batch_size)) | |
| print(E) | |
| result = [] | |
| results += result | |
| index += batch_size | |
| results_to_csv(results) | |
| #print((index, batch_size, len(keywords))) | |
| return results, 'themes_results.csv' | |
| def results_to_csv(results): | |
| super_dict = collections.defaultdict(list) | |
| for d in results: | |
| for k, v in d.items(): # d.items() in Python 3+ | |
| super_dict[k].append(v) | |
| pd.DataFrame(super_dict).to_csv('themes_results.csv', index=False) | |
| with gr.Blocks() as demo: | |
| prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords | |
| into the appropriate categories. | |
| The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category. | |
| Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs. | |
| If the categories are not given """) | |
| gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header") | |
| keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords') | |
| gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header") | |
| categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories') | |
| btn = gr.Button(value="Run with categories") | |
| btn2 = gr.Button(value="Run without categories") | |
| txt_3 = gr.Textbox(value="", label="Output") | |
| output_file = gr.File(label="Output File", | |
| file_count="single", | |
| file_types=["", ".", ".csv",".xls",".xlsx"]) | |
| btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file]) | |
| btn2.click(run_chain, inputs=[prompt_input, keywords_file], outputs=[txt_3, output_file]) | |
| demo.launch() |