Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """Skill Transformation Journey.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw | |
| """ | |
| import gradio as gr | |
| import re | |
| import openai | |
| from openai import OpenAI | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import linear_kernel | |
| client = OpenAI( | |
| # defaults to os.environ.get("OPENAI_API_KEY") | |
| api_key="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", | |
| ) | |
| # Specify the sheet name in the Excel file | |
| excel_file_path = "1.csv" | |
| sheet_name = "Shortlisted Courses" # Replace with the actual sheet name | |
| # Read the Excel file into a Pandas DataFrame | |
| courses_df = pd.read_csv(excel_file_path) | |
| # Create a TF-IDF vectorizer | |
| tfidf_vectorizer = TfidfVectorizer(stop_words='english') | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna('')) | |
| def html_coversion(gpt_content): | |
| # Provided data in text format | |
| data_text = gpt_content | |
| # Extract course details using a modified regular expression | |
| courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text) | |
| # Process each tuple to remove the second occurrence of the course link | |
| processed_courses = [] | |
| for course_tuple in courses: | |
| # Find the index of the second occurrence of the course link | |
| index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1) | |
| # Remove the second occurrence of the course link from the tuple | |
| processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:] | |
| processed_courses.append(processed_tuple) | |
| # Convert the processed list of tuples into a DataFrame | |
| df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link']) | |
| # Convert the DataFrame to an HTML table | |
| html_table = df.to_html(index=False, escape=False) | |
| # Print or save the HTML table | |
| return html_table | |
| # Function to recommend courses based on user input using GPT and TF-IDF | |
| def recommend_courses(user_skill): | |
| # Combine user's input into a single string for TF-IDF | |
| user_input = f"{user_skill}" | |
| # Use TF-IDF and cosine similarity for initial course recommendations | |
| user_vector = tfidf_vectorizer.transform([user_input]) | |
| cosine_similarities = linear_kernel(user_vector, tfidf_matrix) | |
| # Get initial course recommendations based on similarity scores | |
| recommendations = courses_df.copy() | |
| recommendations['Similarity'] = cosine_similarities[0] | |
| # Sort by similarity and get top recommendations | |
| top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5) | |
| # Generate a text summary of the initial recommendations | |
| initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False) | |
| # Assume GPT generates HTML-formatted final recommendations | |
| final_recommendations_html = html_coversion(initial_recommendations_text) | |
| return final_recommendations_html | |
| # Gradio Interface with dynamically generated dropdown options | |
| iface = gr.Interface( | |
| fn=recommend_courses, | |
| inputs=[ | |
| gr.Textbox("text", label="Enter expected skill"), | |
| #gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"), | |
| ], | |
| outputs="html", | |
| live=True | |
| ) | |
| # Launch the Gradio interface and save the output to an HTML file | |
| iface.launch(share=True) | |