import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib
import gradio as gr

# Load the data from the CSV file
data = pd.read_csv('data.csv')

# Encode 'Price' column into numerical values
data['Price'] = data['Price'].apply(lambda x: 0 if x == 'Free' else 1)

# Convert 'Size' and 'Reviews' columns to numerical values
data['Size'] = data['Size'].str.replace('MB', '').astype(float)
data['Reviews'] = data['Reviews'].str.replace('M', '').astype(float)

# Select the features (reviews, size, and price) and the target variable (rating)
X = data[['Reviews', 'Size', 'Price']]
y = data['Rating']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, 'linear_regression_model.pkl')

# Define a function to make predictions using the model
def predict_rating(reviews, size, price):
    # Load the trained model
    loaded_model = joblib.load('linear_regression_model.pkl')
    # Make predictions using the loaded model
    predicted_rating = loaded_model.predict([[reviews, size, price]])
    return predicted_rating[0]

# Create a Gradio interface for the model
iface = gr.Interface(fn=predict_rating, inputs=["number", "number", "number"], outputs="number", title="App Rating Predictor", examples=[[20, 25.1, 0], [45, 26.7, 1], [60, 30.2, 0]], description="Enter the number of reviews, size(without 'MB' word), and price(0 = paid, 1 = free) of your app to predict its rating.")

# Launch the Gradio interface with a user guide
iface.launch(share=False, debug=True, enable_queue=True)