Spaces:
Runtime error
Runtime error
| from gpt_index import GPTListIndex, SimpleWebPageReader, BeautifulSoupWebReader, GPTSimpleVectorIndex,LLMPredictor | |
| from IPython.display import Markdown, display | |
| from langchain.agents import load_tools, Tool, initialize_agent | |
| from langchain.llms import OpenAI | |
| from langchain.agents import ZeroShotAgent, Tool, AgentExecutor | |
| from langchain.agents import initialize_agent, Tool | |
| from langchain import LLMChain | |
| from langchain import PromptTemplate | |
| import gradio as gr | |
| import pandas as pd | |
| import openai | |
| from sklearn.manifold import TSNE | |
| from sklearn.cluster import KMeans | |
| from openai.embeddings_utils import get_embedding | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib | |
| import datetime | |
| from datetime import datetime, date, time, timedelta | |
| import os | |
| from PIL import Image | |
| from PIL import ImageOps | |
| from PIL import Image, ImageDraw, ImageFont | |
| from io import BytesIO | |
| import requests | |
| import gcsfs | |
| fs = gcsfs.GCSFileSystem(project='createinsightsproject',token='anon') | |
| fs.ls('trends_chrome_extension_bucket') | |
| print('Started') | |
| ###download both text and image from cloud to display | |
| with fs.open('trends_chrome_extension_bucket/lastradartext.txt', 'rb') as file: | |
| data_old = file.read() | |
| print(data_old) | |
| value1,value2,value3,value4,value5,value6=str(data_old.decode()).split('SEPERATOR') | |
| img_data = requests.get('https://storage.googleapis.com/trends_chrome_extension_bucket/lasttechradar.png').content | |
| with open('lasttechradar.png', 'wb') as handler: | |
| handler.write(img_data) | |
| def getlastimage(): | |
| #print('Came into getlastimage') | |
| img_data = requests.get('https://storage.googleapis.com/trends_chrome_extension_bucket/lasttechradar.png').content | |
| with open('lasttechradar1.png', 'wb') as handler: | |
| handler.write(img_data) | |
| with fs.open('trends_chrome_extension_bucket/lastradartext.txt', 'rb') as file: | |
| data_old = file.read() | |
| #print(data_old) | |
| value1,value2,value3,value4,value5,value6=str(data_old.decode()).split('SEPERATOR') | |
| return 'lasttechradar1.png',value1.strip(),value2.strip(),value3.strip(),value4.strip(),value5.strip(),value6.strip() | |
| def getstuff(openapikey): | |
| dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p") | |
| print(dateforfilesave) | |
| os.environ['OPENAI_API_KEY'] = str(openapikey) | |
| mainlistofanswers=[] | |
| for each in ['www.mckinsey.com','www.bcg.com','www.bain.com','www.accenture.com']: | |
| print(each) | |
| Input_URL = "https://"+each | |
| documents = SimpleWebPageReader(html_to_text=True).load_data([Input_URL]) | |
| index = GPTSimpleVectorIndex(documents) | |
| print('Came here 0') | |
| #@title # Creating your Langchain Agent | |
| def querying_db(query: str): | |
| response = index.query(query) | |
| return response | |
| tools = [ | |
| Tool( | |
| name = "QueryingDB", | |
| func=querying_db, | |
| description="This function takes a query string as input and returns the most relevant answer from the documentation as output" | |
| )] | |
| llm = OpenAI(temperature=0,openai_api_key=openapikey) | |
| print('Came here 1') | |
| query_string = "what are the top technologies mentioned?" | |
| agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True) | |
| result = agent.run(query_string) | |
| mainlistofanswers.append(result) | |
| print('Came here 2') | |
| print(mainlistofanswers) | |
| newlistoftech=[] | |
| newlistofcompanies=[] | |
| for i in range(len(mainlistofanswers)): | |
| each=mainlistofanswers[i] | |
| each=each.replace("The top technologies mentioned are ","").replace("The technologies mentioned are ","") | |
| each=each.replace(":","").replace(" and ",",").replace("and ",",").replace(" and",",").replace(" the "," ").replace("the "," ").replace(" the"," ").strip() | |
| for item in each.split(","): | |
| if item!='': | |
| newlistoftech.append(item.strip()) | |
| newlistofcompanies.append(i) | |
| tech_df=pd.DataFrame() | |
| tech_df['tech']=newlistoftech | |
| tech_df['company']=newlistofcompanies | |
| print(newlistoftech) | |
| print('Came here 3') | |
| embedding_model = "text-embedding-ada-002" | |
| embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002 | |
| max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191 | |
| tech_df["embedding"] = tech_df['tech'].apply(lambda x: get_embedding(x, engine=embedding_model)) | |
| print('Came here 4') | |
| # Load the embeddings | |
| # Convert to a list of lists of floats | |
| matrix = np.array(tech_df['embedding'].to_list()) | |
| perplexityvalue=max(int(len(tech_df['embedding'].to_list()))/2,5) ###original value was a constant of 15 | |
| # Create a t-SNE model and transform the data | |
| tsne = TSNE(n_components=2, perplexity=perplexityvalue, random_state=42, init='random', learning_rate=200) | |
| vis_dims = tsne.fit_transform(matrix) | |
| n_clusters = 5 | |
| kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=42) | |
| kmeans.fit(matrix) | |
| labels = kmeans.labels_ | |
| tech_df["Cluster"] = labels | |
| print('Came here 5') | |
| colors = ["red", "darkorange", "darkgrey", "blue", "darkgreen"] | |
| x = [x for x,y in vis_dims] | |
| y = [y for x,y in vis_dims] | |
| color_indices = tech_df['Cluster'].values | |
| colormap = matplotlib.colors.ListedColormap(colors) | |
| #plt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3,) | |
| fig, ax = plt.subplots(figsize=(12,8)) | |
| ax.scatter(x, y, c=color_indices, cmap=colormap, alpha=1, s=100) | |
| for i, txt in enumerate(tech_df['tech'].tolist()): | |
| ax.annotate(txt, (x[i], y[i]),fontsize=14) | |
| plt.title("Top Technologies as of "+dateforfilesave,fontsize=20) | |
| plt.axis('off') | |
| plt.savefig('lasttechradar.png', bbox_inches='tight') | |
| print('Came here 6') | |
| response = openai.Completion.create( | |
| engine="text-davinci-003", | |
| prompt=f'I will give you top technologies list. Write a paragraph on it.\n\nTechnologies:'+",".join(tech_df['tech'].tolist()), | |
| temperature=0, | |
| max_tokens=1024, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| ) | |
| print(response["choices"][0]["text"].replace("\n", "")) | |
| desc_tmp=response["choices"][0]["text"].replace("\n", "") | |
| print('Came here 7') | |
| # Reading a review which belong to each group. | |
| rev_per_cluster = 5 | |
| clusterstextlist=[] | |
| for i in range(n_clusters): | |
| print(f"Cluster {i} Theme:", end=" ") | |
| reviews = "\n".join(tech_df[tech_df['Cluster'] == i]['tech'].tolist()) | |
| response = openai.Completion.create( | |
| engine="text-davinci-003", | |
| prompt=f'What do the following technologies have in common?\n\nCustomer reviews:\n"""\n{reviews}\n"""\n\nTheme:', | |
| temperature=0, | |
| max_tokens=64, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| ) | |
| print(response["choices"][0]["text"].replace("\n", "")) | |
| print(reviews) | |
| clusterstextlist.append("Cluster "+str(i)+"\nTheme:"+response["choices"][0]["text"].replace("\n", "")+'\n'+reviews+'\n'+"-" * 10+'\n\n') | |
| textlist=[mainlistofanswers[0],"SEPERATOR",mainlistofanswers[1],"SEPERATOR",mainlistofanswers[2],"SEPERATOR",mainlistofanswers[3],"SEPERATOR",desc_tmp,"SEPERATOR","".join(clusterstextlist)] | |
| ###create file with new info locally & upload to bucket | |
| with open('lastradartext.txt', 'w') as f: | |
| for line in textlist: | |
| f.write(f"{line}\n") | |
| with fs.open('trends_chrome_extension_bucket/lastradartext.txt', 'wb') as file: | |
| for line in textlist: | |
| file.write(f"{line}\n".encode()) | |
| print('Came here 8') | |
| ###read it and put in output | |
| with open('lastradartext.txt', 'r') as file: | |
| data_old = file.read() | |
| value1,value2,value3,value4,value5,value6=str(data_old).split('SEPERATOR') | |
| ###upload image to cloud for next run display | |
| with open('lasttechradar.png','rb') as image_file: | |
| image_string = image_file.read() | |
| with fs.open('trends_chrome_extension_bucket/lasttechradar.png', 'wb') as file: | |
| file.write(image_string) | |
| return 'lasttechradar.png',mainlistofanswers[0],mainlistofanswers[1],mainlistofanswers[2],mainlistofanswers[3],desc_tmp,"".join(clusterstextlist) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("<h1><center>ChatGPT Technology Radar</center></h1>") | |
| gr.Markdown( | |
| """What are the top technologies as of now? Let us query top consulting company websites & use ChatGPT to understand. \n\nShowcases ChatGPT integrated with real data. It shows how to get real-time data and marry it with ChatGPT capabilities. This demonstrates 'Chain of Thought' thinking using ChatGPT.\nLangChain & GPT-Index are both used.\n """ | |
| ) | |
| with gr.Row() as row: | |
| textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key') | |
| btn = gr.Button("Refresh") | |
| with gr.Row() as row: | |
| with gr.Column(): | |
| output_image = gr.components.Image(label="Tech Radar",value='lasttechradar.png') | |
| with gr.Column(): | |
| outputMck = gr.Textbox(placeholder=value1, lines=1,label='McKinsey View') | |
| outputBcg = gr.Textbox(placeholder=value2, lines=1,label='BCG View') | |
| outputBain = gr.Textbox(placeholder=value3, lines=1,label='Bain View') | |
| outputAcc = gr.Textbox(placeholder=value4, lines=1,label='Accenture View') | |
| with gr.Row() as row: | |
| with gr.Column(): | |
| outputdesc = gr.Textbox(placeholder=value5, lines=1,label='Description') | |
| with gr.Column(): | |
| outputclusters = gr.Textbox(placeholder=value6, lines=1,label='Clusters') | |
| btn.click(getstuff, inputs=[textboxopenapi],outputs=[output_image,outputMck,outputBcg,outputBain,outputAcc,outputdesc,outputclusters]) | |
| demo.load(getlastimage,[],[output_image,outputMck,outputBcg,outputBain,outputAcc,outputdesc,outputclusters]) | |
| demo.launch(debug=True) |