Prediction of cluster assignments — predict.fuzzyclara • fuzzyclara

Function to predict cluster assignments

Usage

# S3 method for class 'fuzzyclara'
predict(object, newdata, ...)

Arguments

object: an object of class "fuzzyclara"
newdata: data.frame containing the variables based on which the predictions should be calculated
...: further arguments for predict functions

Value

clustering plot tibble

Examples


# Split data intp test and training data
train_indices <- sample(x = nrow(USArrests), size = 0.7*nrow(USArrests))
USArrests_train <- USArrests[train_indices, ]
USArrests_test <- USArrests[-train_indices, ]

# Determine fuzzy clusters on training data

cc_fuzzy <- fuzzyclara(data        = USArrests_train,
                       clusters    = 3,
                       metric      = "euclidean",
                       samples     = 1,
                       sample_size = NULL,
                       type        = "fuzzy",
                       m           = 2,
                       seed        = 3526,
                       verbose     = 0)
#> Warning: The specified sample size is equal to the number of
#>     observations in the data. PAM clustering is performed on the entire data.
cc_fuzzy
#> Clustering results
#> 
#> Medoids
#> [1] "Missouri"   "Kansas"     "New Mexico"
#> 
#> Clustering
#>  [1] 3 1 3 1 1 2 1 2 2 1 1 2 2 2 2 2 2 1 2 2 2 3 2 2 2 2 2 1 1 1 1 2 3 2 2
#> 
#> Minimum average weighted distance
#> [1] 1.752737
#> 
#> Membership scores
#>                 Cluster1  Cluster2  Cluster3
#> New Jersey     0.2500264 0.3724113 0.3775623
#> Georgia        0.3933379 0.2502455 0.3564166
#> Oregon         0.2096447 0.3374293 0.4529260
#> Arizona        0.4910670 0.1862710 0.3226621
#> South Carolina 0.4028542 0.2632170 0.3339288
#> Hawaii         0.2010352 0.4736478 0.3253169
#> Illinois       0.3893153 0.2262988 0.3843859
#> Maine          0.2105460 0.5049071 0.2845470
#> Oklahoma       0.1310228 0.5877583 0.2812189
#> Maryland       0.6500466 0.1276540 0.2222994
#> New Mexico     1.0000000 0.0000000 0.0000000
#> Kentucky       0.2095222 0.4742663 0.3162115
#> Arkansas       0.2611842 0.3822406 0.3565752
#> Minnesota      0.1564806 0.6006110 0.2429084
#> Kansas         0.0000000 1.0000000 0.0000000
#> Indiana        0.1089868 0.6601315 0.2308817
#> Vermont        0.2479323 0.4389313 0.3131364
#> California     0.4337389 0.2210383 0.3452229
#> Wyoming        0.1653750 0.5654147 0.2692104
#> North Dakota   0.2305061 0.4692949 0.3001990
#> Idaho          0.1866650 0.5433572 0.2699778
#> Colorado       0.3612727 0.2133301 0.4253972
#> Washington     0.1956706 0.4162023 0.3881271
#> Iowa           0.1894373 0.5395477 0.2710150
#> Ohio           0.1587485 0.4923727 0.3488787
#> Rhode Island   0.2461388 0.4321363 0.3217249
#> Connecticut    0.1857163 0.5383286 0.2759551
#> Alabama        0.3725449 0.2621495 0.3653056
#> Louisiana      0.4240164 0.2317821 0.3442015
#> Nevada         0.4569696 0.2063121 0.3367184
#> Florida        0.5373729 0.1832801 0.2793469
#> Virginia       0.1748823 0.4467913 0.3783264
#> Missouri       0.0000000 0.0000000 1.0000000
#> Nebraska       0.1144836 0.6965148 0.1890016
#> Pennsylvania   0.1222504 0.6671431 0.2106065

# Determine distance matrix of the observations and cluster medoids
USArrests_medoids <- USArrests[rownames(USArrests) %in% cc_fuzzy$medoids,]
dist <- proxy::dist(x = USArrests_test[, -1], y = USArrests_medoids[, -1],
                    method = "euclidean") #use same metric as above

# Make cluster prediction for test data

USArrests_clusters_predicted <- predict(object = cc_fuzzy,
                                        newdata = USArrests_test,
                                        dist_matrix = dist)
USArrests_clusters_predicted$membership_scores
#>                 Cluster1  Cluster2  Cluster3
#> Alaska         0.4183864 0.2390221 0.3425915
#> Delaware       0.2662189 0.3794404 0.3543406
#> Massachusetts  0.2195957 0.4471681 0.3332363
#> Michigan       0.6186898 0.1245516 0.2567585
#> Mississippi    0.3637007 0.2987573 0.3375420
#> Montana        0.1694664 0.5666950 0.2638386
#> New Hampshire  0.1968295 0.5273201 0.2758504
#> New York       0.4169648 0.2170146 0.3660205
#> North Carolina 0.3861993 0.2881459 0.3256548
#> South Dakota   0.2126881 0.4951130 0.2921988
#> Tennessee      0.3362672 0.2452547 0.4184781
#> Texas          0.3475477 0.2246028 0.4278495
#> Utah           0.2007003 0.4581080 0.3411917
#> West Virginia  0.2347547 0.4599033 0.3053420
#> Wisconsin      0.1788601 0.5586124 0.2625275