Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from openai import OpenAI | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| import re | |
| load_dotenv() # take environment variables from .env. | |
| api_key = os.getenv('OPENAI_API_KEY') | |
| #print(api_key) | |
| # Create a filtered dataframe with the most 20 ballanced intents | |
| def get_ballanced_intents(data): | |
| intents = data['intent'].value_counts() | |
| intents = intents[intents <= 40].index.tolist() | |
| filtered_data = data[data['intent'].isin(intents)] | |
| print(f"Filtered data shape: {filtered_data.shape}") | |
| return filtered_data | |
| # Create a prompt for the OpenAI Chat model | |
| def create_prompt(user_text, utterances): | |
| prompt = f"User text: {user_text}\n\nUtterance examples:\n" | |
| for i, utterance in enumerate(utterances): | |
| prompt += f"{i + 1}. {utterance}\n" | |
| prompt += "\nPlease rate the similarity of the user text to each of the utterance examples on a scale from 0 to 1." | |
| return prompt | |
| # Get the similarity scores for the user text and the utterance examples | |
| def get_similarity_scores(prompt, temperature): | |
| load_dotenv() # take environment variables from .env. | |
| api_key = os.getenv('OPENAI_API_KEY') | |
| #print(api_key) | |
| if not api_key: | |
| st.write("Please set the OPENAI_API_KEY environment variable.") | |
| return | |
| #client = OpenAI(api_key=api_key) | |
| client = OpenAI(api_key='sk-proj-SNnm3Z9t6BvJgQ3ztjWLT3BlbkFJ1037ZT68ltN47zcir44l') | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant. Respond without details in ()"}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| max_tokens=150, | |
| n=1, | |
| stop='8.', | |
| temperature=temperature, | |
| ) | |
| similarities = response.choices[0].message.content.strip().split('\n') | |
| #print(similarities) | |
| for sim in similarities: | |
| #print(sim) | |
| print(sim.split('- ')[-1]) | |
| similarity_scores = [sim.split('- ')[-1] for sim in similarities] | |
| similarity_scores = [sim if re.match(r'^0\.[1-9]$|^1\.0$', sim) else '0' for sim in similarity_scores] # Filter out the similarity scores | |
| return similarity_scores | |
| # Get the most similar intent and its confidence score | |
| def get_most_similar_intent(user_text, utterances, intents, temperature=0.5): | |
| prompt = create_prompt(user_text, utterances) | |
| similarity_scores = get_similarity_scores(prompt, temperature) | |
| print(similarity_scores) | |
| max_index = similarity_scores.index(max(similarity_scores)) | |
| print(f'max_index: {max_index}') | |
| most_similar_intent = intents[max_index] | |
| return most_similar_intent, similarity_scores[max_index] | |