| import pandas as pd | |
| import openai | |
| import faiss | |
| import os | |
| # Load your custom CSV data | |
| data = pd.read_csv( os.getcwd() + "\\Learning_Pathway_Index.csv") | |
| # Initialize and populate FAISS index | |
| vector_dimension = 768 # For example, if you use a GPT-3 model with 768-dimensional embeddings | |
| index = faiss.IndexFlatL2(vector_dimension) | |
| vectors = [] # List to store vector representations of data | |
| for text in data['text_column']: | |
| # Vectorize the text using a pre-trained model (e.g., GPT-3) | |
| # Replace 'YOUR_OPENAI_API_KEY' with your actual API key | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| response = openai.Completion.create( | |
| engine="text-davinci-002", | |
| prompt=text, | |
| max_tokens=50 # Adjust the token limit as needed | |
| ) | |
| vector = response.choices[0].embedding | |
| vectors.append(vector) | |
| # Convert the list of vectors to a numpy array | |
| vectors = np.array(vectors).astype('float32') | |
| # Add vectors to the FAISS index | |
| index.add(vectors) | |
| # Accept user questions using OpenAI | |
| user_question = input("Ask a question: ") | |
| # Vectorize the user's question | |
| user_vector = vectorize_user_question(user_question) # Implement this function | |
| # Search for similar items in the FAISS index | |
| k = 5 # Number of similar items to retrieve | |
| distances, indices = index.search(user_vector, k) | |
| # Retrieve and display the similar items | |
| similar_items = data.iloc[indices[0]] | |
| print(similar_items) | |