Trains regression, lasso, ridge model in R

Details

Trains linear models such as Logistic, Lasso or Ridge regression model. It is built on glmnet R package. This class provides fit, predict, cross valdidation functions.

Public fields

family

type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"

weights

observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation

alpha

The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression

lambda

the number of lambda values - default is 100

standardize

normalise the features in the given data

standardize.response

normalise the dependent variable between 0 and 1, default = FALSE

model

internal use

cvmodel

internal use

Flag

internal use

is_lasso

internal use

iid_names

internal use

Methods


Method new()

Usage

LMTrainer$new(family, weights, alpha, lambda, standardize.response)

Arguments

family

character, type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"

weights

numeric, observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation

alpha

integer, The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression

lambda

integer, the number of lambda values - default is 100

standardize.response

logical, normalise the dependent variable between 0 and 1, default = FALSE

Details

Create a new `LMTrainer` object.

Returns

A `LMTrainer` object.

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
}


Method fit()

Usage

LMTrainer$fit(X, y)

Arguments

X

data.frame containing train featuers

y

character, name of target variable

Details

Fits the LMTrainer model on given data

Returns

NULL, train the model and saves internally

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
}


Method predict()

Usage

LMTrainer$predict(df, lambda = NULL)

Arguments

df

data.frame containing test features

lambda

integer, the number of lambda values - default is 100. By default it picks the best value from the model.

Details

Returns predictions for test data

Returns

vector, a vector containing predictions

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
predictions <- lf$cv_predict(df = housing)
}


Method cv_model()

Usage

LMTrainer$cv_model(X, y, nfolds, parallel, type.measure = "deviance")

Arguments

X

data.frame containing test features

y

character, name of target variable

nfolds

integer, number of folds

parallel

logical, if do parallel computation. Default=FALSE

type.measure

character, evaluation metric type. Default = deviance

Details

Train regression model using cross validation

Returns

NULL, trains the model and saves it in memory

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
}


Method cv_predict()

Usage

LMTrainer$cv_predict(df, lambda = NULL)

Arguments

df

data.frame containing test features

lambda

integer, the number of lambda values - default is 100. By default it picks the best value from the model.

Details

Get predictions from the cross validated regression model

Returns

vector a vector containing predicted values

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
}


Method get_importance()

Usage

LMTrainer$get_importance()

Details

Get feature importance using model coefficients

Returns

a matrix containing feature coefficients

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
coefs <- lf$get_importance()
}


Method clone()

The objects of this class are cloneable with this method.

Usage

LMTrainer$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Examples


## ------------------------------------------------
## Method `LMTrainer$new`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
}

## ------------------------------------------------
## Method `LMTrainer$fit`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
}

## ------------------------------------------------
## Method `LMTrainer$predict`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
predictions <- lf$cv_predict(df = housing)
}

## ------------------------------------------------
## Method `LMTrainer$cv_model`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
}

## ------------------------------------------------
## Method `LMTrainer$cv_predict`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
}

## ------------------------------------------------
## Method `LMTrainer$get_importance`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
coefs <- lf$get_importance()
}