Skip to contents

Function to predict cluster assignments

Usage

# S3 method for class 'fuzzyclara'
predict(object, newdata, ...)

Arguments

object

an object of class "fuzzyclara"

newdata

data.frame containing the variables based on which the predictions should be calculated

...

further arguments for predict functions

Value

clustering plot tibble

Examples


# Split data intp test and training data
train_indices <- sample(x = nrow(USArrests), size = 0.7*nrow(USArrests))
USArrests_train <- USArrests[train_indices, ]
USArrests_test <- USArrests[-train_indices, ]

# Determine fuzzy clusters on training data

cc_fuzzy <- fuzzyclara(data        = USArrests_train,
                       clusters    = 3,
                       metric      = "euclidean",
                       samples     = 1,
                       sample_size = NULL,
                       type        = "fuzzy",
                       m           = 2,
                       seed        = 3526,
                       verbose     = 0)
#> Warning: The specified sample size is equal to the number of
#>     observations in the data. PAM clustering is performed on the entire data.
cc_fuzzy
#> Clustering results
#> 
#> Medoids
#> [1] "Missouri"   "Kansas"     "New Mexico"
#> 
#> Clustering
#>  [1] 3 1 3 1 1 2 1 2 2 1 1 2 2 2 2 2 2 1 2 2 2 3 2 2 2 2 2 1 1 1 1 2 3 2 2
#> 
#> Minimum average weighted distance
#> [1] 1.752737
#> 
#> Membership scores
#>                 Cluster1  Cluster2  Cluster3
#> New Jersey     0.2500264 0.3724113 0.3775623
#> Georgia        0.3933379 0.2502455 0.3564166
#> Oregon         0.2096447 0.3374293 0.4529260
#> Arizona        0.4910670 0.1862710 0.3226621
#> South Carolina 0.4028542 0.2632170 0.3339288
#> Hawaii         0.2010352 0.4736478 0.3253169
#> Illinois       0.3893153 0.2262988 0.3843859
#> Maine          0.2105460 0.5049071 0.2845470
#> Oklahoma       0.1310228 0.5877583 0.2812189
#> Maryland       0.6500466 0.1276540 0.2222994
#> New Mexico     1.0000000 0.0000000 0.0000000
#> Kentucky       0.2095222 0.4742663 0.3162115
#> Arkansas       0.2611842 0.3822406 0.3565752
#> Minnesota      0.1564806 0.6006110 0.2429084
#> Kansas         0.0000000 1.0000000 0.0000000
#> Indiana        0.1089868 0.6601315 0.2308817
#> Vermont        0.2479323 0.4389313 0.3131364
#> California     0.4337389 0.2210383 0.3452229
#> Wyoming        0.1653750 0.5654147 0.2692104
#> North Dakota   0.2305061 0.4692949 0.3001990
#> Idaho          0.1866650 0.5433572 0.2699778
#> Colorado       0.3612727 0.2133301 0.4253972
#> Washington     0.1956706 0.4162023 0.3881271
#> Iowa           0.1894373 0.5395477 0.2710150
#> Ohio           0.1587485 0.4923727 0.3488787
#> Rhode Island   0.2461388 0.4321363 0.3217249
#> Connecticut    0.1857163 0.5383286 0.2759551
#> Alabama        0.3725449 0.2621495 0.3653056
#> Louisiana      0.4240164 0.2317821 0.3442015
#> Nevada         0.4569696 0.2063121 0.3367184
#> Florida        0.5373729 0.1832801 0.2793469
#> Virginia       0.1748823 0.4467913 0.3783264
#> Missouri       0.0000000 0.0000000 1.0000000
#> Nebraska       0.1144836 0.6965148 0.1890016
#> Pennsylvania   0.1222504 0.6671431 0.2106065

# Determine distance matrix of the observations and cluster medoids
USArrests_medoids <- USArrests[rownames(USArrests) %in% cc_fuzzy$medoids,]
dist <- proxy::dist(x = USArrests_test[, -1], y = USArrests_medoids[, -1],
                    method = "euclidean") #use same metric as above

# Make cluster prediction for test data

USArrests_clusters_predicted <- predict(object = cc_fuzzy,
                                        newdata = USArrests_test,
                                        dist_matrix = dist)
USArrests_clusters_predicted$membership_scores
#>                 Cluster1  Cluster2  Cluster3
#> Alaska         0.4183864 0.2390221 0.3425915
#> Delaware       0.2662189 0.3794404 0.3543406
#> Massachusetts  0.2195957 0.4471681 0.3332363
#> Michigan       0.6186898 0.1245516 0.2567585
#> Mississippi    0.3637007 0.2987573 0.3375420
#> Montana        0.1694664 0.5666950 0.2638386
#> New Hampshire  0.1968295 0.5273201 0.2758504
#> New York       0.4169648 0.2170146 0.3660205
#> North Carolina 0.3861993 0.2881459 0.3256548
#> South Dakota   0.2126881 0.4951130 0.2921988
#> Tennessee      0.3362672 0.2452547 0.4184781
#> Texas          0.3475477 0.2246028 0.4278495
#> Utah           0.2007003 0.4581080 0.3411917
#> West Virginia  0.2347547 0.4599033 0.3053420
#> Wisconsin      0.1788601 0.5586124 0.2625275