Prediction of cluster assignments
predict.fuzzyclara.RdFunction to predict cluster assignments
Usage
# S3 method for class 'fuzzyclara'
predict(object, newdata, ...)Examples
# Split data intp test and training data
train_indices <- sample(x = nrow(USArrests), size = 0.7*nrow(USArrests))
USArrests_train <- USArrests[train_indices, ]
USArrests_test <- USArrests[-train_indices, ]
# Determine fuzzy clusters on training data
cc_fuzzy <- fuzzyclara(data = USArrests_train,
clusters = 3,
metric = "euclidean",
samples = 1,
sample_size = NULL,
type = "fuzzy",
m = 2,
seed = 3526,
verbose = 0)
#> Warning: The specified sample size is equal to the number of
#> observations in the data. PAM clustering is performed on the entire data.
cc_fuzzy
#> Clustering results
#>
#> Medoids
#> [1] "Missouri" "Kansas" "New Mexico"
#>
#> Clustering
#> [1] 3 1 3 1 1 2 1 2 2 1 1 2 2 2 2 2 2 1 2 2 2 3 2 2 2 2 2 1 1 1 1 2 3 2 2
#>
#> Minimum average weighted distance
#> [1] 1.752737
#>
#> Membership scores
#> Cluster1 Cluster2 Cluster3
#> New Jersey 0.2500264 0.3724113 0.3775623
#> Georgia 0.3933379 0.2502455 0.3564166
#> Oregon 0.2096447 0.3374293 0.4529260
#> Arizona 0.4910670 0.1862710 0.3226621
#> South Carolina 0.4028542 0.2632170 0.3339288
#> Hawaii 0.2010352 0.4736478 0.3253169
#> Illinois 0.3893153 0.2262988 0.3843859
#> Maine 0.2105460 0.5049071 0.2845470
#> Oklahoma 0.1310228 0.5877583 0.2812189
#> Maryland 0.6500466 0.1276540 0.2222994
#> New Mexico 1.0000000 0.0000000 0.0000000
#> Kentucky 0.2095222 0.4742663 0.3162115
#> Arkansas 0.2611842 0.3822406 0.3565752
#> Minnesota 0.1564806 0.6006110 0.2429084
#> Kansas 0.0000000 1.0000000 0.0000000
#> Indiana 0.1089868 0.6601315 0.2308817
#> Vermont 0.2479323 0.4389313 0.3131364
#> California 0.4337389 0.2210383 0.3452229
#> Wyoming 0.1653750 0.5654147 0.2692104
#> North Dakota 0.2305061 0.4692949 0.3001990
#> Idaho 0.1866650 0.5433572 0.2699778
#> Colorado 0.3612727 0.2133301 0.4253972
#> Washington 0.1956706 0.4162023 0.3881271
#> Iowa 0.1894373 0.5395477 0.2710150
#> Ohio 0.1587485 0.4923727 0.3488787
#> Rhode Island 0.2461388 0.4321363 0.3217249
#> Connecticut 0.1857163 0.5383286 0.2759551
#> Alabama 0.3725449 0.2621495 0.3653056
#> Louisiana 0.4240164 0.2317821 0.3442015
#> Nevada 0.4569696 0.2063121 0.3367184
#> Florida 0.5373729 0.1832801 0.2793469
#> Virginia 0.1748823 0.4467913 0.3783264
#> Missouri 0.0000000 0.0000000 1.0000000
#> Nebraska 0.1144836 0.6965148 0.1890016
#> Pennsylvania 0.1222504 0.6671431 0.2106065
# Determine distance matrix of the observations and cluster medoids
USArrests_medoids <- USArrests[rownames(USArrests) %in% cc_fuzzy$medoids,]
dist <- proxy::dist(x = USArrests_test[, -1], y = USArrests_medoids[, -1],
method = "euclidean") #use same metric as above
# Make cluster prediction for test data
USArrests_clusters_predicted <- predict(object = cc_fuzzy,
newdata = USArrests_test,
dist_matrix = dist)
USArrests_clusters_predicted$membership_scores
#> Cluster1 Cluster2 Cluster3
#> Alaska 0.4183864 0.2390221 0.3425915
#> Delaware 0.2662189 0.3794404 0.3543406
#> Massachusetts 0.2195957 0.4471681 0.3332363
#> Michigan 0.6186898 0.1245516 0.2567585
#> Mississippi 0.3637007 0.2987573 0.3375420
#> Montana 0.1694664 0.5666950 0.2638386
#> New Hampshire 0.1968295 0.5273201 0.2758504
#> New York 0.4169648 0.2170146 0.3660205
#> North Carolina 0.3861993 0.2881459 0.3256548
#> South Dakota 0.2126881 0.4951130 0.2921988
#> Tennessee 0.3362672 0.2452547 0.4184781
#> Texas 0.3475477 0.2246028 0.4278495
#> Utah 0.2007003 0.4581080 0.3411917
#> West Virginia 0.2347547 0.4599033 0.3053420
#> Wisconsin 0.1788601 0.5586124 0.2625275