Skip to contents

Function to provide graphical visualization of distribution

Usage

# S3 method for class 'fuzzyclara'
plot(
  x,
  data,
  type = NULL,
  variable = NULL,
  na.omit = FALSE,
  membership_threshold = 0,
  sample_percentage = 1,
  plot_membership_scores = FALSE,
  seed = 42,
  ...
)

Arguments

x

An object of class "fuzzyclara"

data

data.frame or matrix used for clustering

type, variable

Type of plot. One of c("barplot","boxplot","wordclouds", "silhouette","pca","scatterplot", "parallel"). Defaults to NULL, which either plots a barplot or a boxplot, depending on the class of variable.

na.omit

Should missing values be excluded for plotting? Defaults to FALSE.

membership_threshold

Threshold for fuzzy clustering observations to be plotted. Must be a number between 0 and 1. Defaults to 0.

sample_percentage

Percentage value that indicates which percentage of observations should randomly selected for representation the plot. Must be a number between 0 and 1. Defaults to 1.

plot_membership_scores

Boolean value indicating whether the cluster membership scores for the observations should be indicated through line the transparency (TRUE) or not (FALSE). Defaults to FALSE.

seed

random number seed (needed for clara_wordcloud and clara_parallel)

...

Further arguments for internal plot functions. For each type there is an internal plot function. See for example ?clara_pca.

Value

Clustering plot

Examples


# Prepare data for example (enrich the USArrest dataset by area and state)
library(dplyr)
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union
USArrests_enriched <- USArrests %>% 
  mutate(State = as.factor(rownames(USArrests)),
         Area  = as.factor(case_when(State %in% c("Washington", "Oregon",
                                               "California", "Nevada",
                                               "Arizona", "Idaho", "Montana",
                                               "Wyoming", "Colorado",
                                               "New Mexico", "Utah", "Hawaii",
                                               "Alaska") ~ "West",
                                  State %in% c("Texas", "Oklahoma", "Arkansas",
                                               "Louisiana", "Mississippi",
                                               "Alabama", "Tennessee",
                                               "Kentucky", "Georgia",
                                               "Florida", "South Carolina",
                                               "North Carolina", "Virginia",
                                               "West Virginia") ~ "South",
                                  State %in% c("Kansas", "Nebraska", "South Dakota",
                                               "North Dakota", "Minnesota",
                                               "Missouri", "Iowa", "Illinois",
                                               "Indiana", "Michigan", "Wisconsin",
                                               "Ohio") ~ "Midwest",
                                  State %in% c("Maine", "New Hampshire", "New York",
                                               "Massachusetts", "Rhode Island",
                                               "Vermont", "Pennsylvania",
                                               "New Jersey", "Connecticut",
                                               "Delaware", "Maryland") ~
                                               "Northeast")))
# Determine clusters that will be plotted                                 
cc_hard <- fuzzyclara(data        = USArrests,
                      clusters    = 3,
                      metric      = "euclidean",
                      samples     = 1,
                      sample_size = NULL,
                      type        = "hard",
                      seed        = 3526,
                      verbose     = 0)
cc_hard
#> Clustering results
#> 
#> Medoids
#> [1] "New Mexico"    "Oklahoma"      "New Hampshire"
#> 
#> Clustering
#>  [1] 2 2 2 3 2 2 3 3 2 2 3 1 2 3 1 3 3 2 1 2 3 2 1 2 3 3 3 2 1 3 2 2 2 1 3 3 3 3
#> [39] 3 2 1 2 2 3 1 3 3 1 1 3
#> 
#> Minimum average distance
#> [1] 1.180717

cc_fuzzy <- fuzzyclara(data        = USArrests,
                      clusters    = 3,
                      metric      = "euclidean",
                      samples     = 1,
                      sample_size = NULL,
                      type        = "fuzzy",
                      m           = 2,
                      seed        = 3526,
                      verbose     = 0)
cc_fuzzy
#> Clustering results
#> 
#> Medoids
#> [1] "Oklahoma"  "Arizona"   "Tennessee"
#> 
#> Clustering
#>  [1] 3 3 1 2 1 1 2 2 1 3 2 2 1 2 2 2 2 3 2 1 2 1 2 3 2 2 2 1 2 2 1 1 3 2 2 2 2 2
#> [39] 2 3 2 3 3 2 2 2 2 2 2 2
#> 
#> Minimum average weighted distance
#> [1] 1.94242
#> 
#> Membership scores
#>                 Cluster1  Cluster2  Cluster3
#> Alabama        0.2040878 0.2391714 0.5567409
#> Alaska         0.3373655 0.2726496 0.3899849
#> Arizona        1.0000000 0.0000000 0.0000000
#> Arkansas       0.2075892 0.3966215 0.3957893
#> California     0.5401685 0.2248051 0.2350264
#> Colorado       0.4475538 0.2744007 0.2780455
#> Connecticut    0.2348136 0.5280016 0.2371848
#> Delaware       0.2906227 0.4701428 0.2392345
#> Florida        0.4443412 0.2316682 0.3239905
#> Georgia        0.2091524 0.2149396 0.5759081
#> Hawaii         0.2482766 0.4883161 0.2634073
#> Idaho          0.2209589 0.5129169 0.2661242
#> Illinois       0.4666698 0.2739684 0.2593617
#> Indiana        0.1344369 0.6694262 0.1961369
#> Iowa           0.2311216 0.4905457 0.2783327
#> Kansas         0.1310680 0.6999444 0.1689876
#> Kentucky       0.1917648 0.4401893 0.3680459
#> Louisiana      0.2560625 0.2412981 0.5026393
#> Maine          0.2396947 0.4695769 0.2907285
#> Maryland       0.4281216 0.2306369 0.3412416
#> Massachusetts  0.2682437 0.5043343 0.2274220
#> Michigan       0.4467571 0.2192029 0.3340400
#> Minnesota      0.2158369 0.5379562 0.2462069
#> Mississippi    0.2484292 0.2817416 0.4698292
#> Missouri       0.2669546 0.3898602 0.3431852
#> Montana        0.1922866 0.5233027 0.2844107
#> Nebraska       0.1814854 0.5935543 0.2249603
#> Nevada         0.4372644 0.2469118 0.3158237
#> New Hampshire  0.2351771 0.4821286 0.2826942
#> New Jersey     0.3025875 0.4474846 0.2499279
#> New Mexico     0.4736616 0.2098122 0.3165261
#> New York       0.4959333 0.2489337 0.2551329
#> North Carolina 0.2984813 0.2995500 0.4019686
#> North Dakota   0.2525175 0.4409055 0.3065770
#> Ohio           0.1722044 0.6264071 0.2013885
#> Oklahoma       0.0000000 1.0000000 0.0000000
#> Oregon         0.2597840 0.4842455 0.2559705
#> Pennsylvania   0.1733916 0.6187011 0.2079073
#> Rhode Island   0.2938264 0.4548769 0.2512968
#> South Carolina 0.2521289 0.2569116 0.4909595
#> South Dakota   0.2294820 0.4627901 0.3077278
#> Tennessee      0.0000000 0.0000000 1.0000000
#> Texas          0.3315450 0.2977964 0.3706587
#> Utah           0.2550652 0.5204090 0.2245258
#> Vermont        0.2537642 0.4173744 0.3288614
#> Virginia       0.1470128 0.6016305 0.2513568
#> Washington     0.2420740 0.5403595 0.2175666
#> West Virginia  0.2356115 0.4301945 0.3341939
#> Wisconsin      0.2298126 0.5057011 0.2644864
#> Wyoming        0.1652925 0.6041126 0.2305949
                                    
# Boxplot
plot(x = cc_hard, data = USArrests_enriched, variable = "Assault") 


# Barplot
plot(x = cc_hard, data = USArrests_enriched, variable = "Area")


# Wordcloud
plot(x = cc_hard, data = USArrests_enriched, variable = "State", 
     type = "wordclouds", seed = 123)                                    

                                    
# Scatterplot
plot(x = cc_hard, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault")   
#> `geom_smooth()` using formula = 'y ~ x'


# Plot membership probability for fuzzy clustering
plot(x = cc_fuzzy, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault", 
     focus = TRUE)


# Plot membership probability for fuzzy clustering (one cluster only)
plot(x = cc_fuzzy, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault", 
     focus = TRUE, focus_clusters = c(1))  

     
# PCA
plot(x = cc_hard, data = USArrests_enriched, type = "pca",
     group_by = "Area")     

 
# Plot membership probability for one or more clusters following a PCA    
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     focus = TRUE)     

       
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     focus = TRUE, focus_clusters = c(1))      

  
# Silhouette plot
plot(x = cc_hard, data = USArrests, type = "silhouette")  
#> $plot

#> 
#> $silhouette_table
#>   Cluster Size Silhouette width
#> 1       1   10        0.4604416
#> 2       2   19        0.2757843
#> 3       3   21        0.2797126
#> 
#> $average_silhouette_width
#> [1] 0.3143656
#> 
 
# Plot clusters for fuzzy clustering (using threshold for membership scores)
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     variable = "Assault", membership_threshold = 0) 

       
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     variable = "Assault", membership_threshold = 0.5)       

       
plot(x = cc_fuzzy, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault", membership_threshold = 0)  
#> `geom_smooth()` using formula = 'y ~ x'

     
plot(x = cc_fuzzy, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault", membership_threshold = 0.5)     
#> `geom_smooth()` using formula = 'y ~ x'
#> Warning: NaNs produced
#> Warning: no non-missing arguments to max; returning -Inf

  
plot(x = cc_fuzzy, data = USArrests_enriched, type = "scatterplot",
     x_var = "Murder", y_var = "Assault", membership_threshold = 0.5,
     plot_all_fuzzy = TRUE)  
#> `geom_smooth()` using formula = 'y ~ x'
#> Warning: NaNs produced
#> Warning: no non-missing arguments to max; returning -Inf

  
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     group_by = "Area", membership_threshold = 0)      

     
plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     group_by = "Area", membership_threshold = 0.5)     


plot(x = cc_fuzzy, data = USArrests_enriched, type = "pca",
     group_by = "Area", membership_threshold = 0.5, plot_all_fuzzy = TRUE)     


# Parallel Plot
plot(x = cc_fuzzy, data = USArrests_enriched, 
     type = "parallel", sample_percentage = 1, plot_membership_scores = TRUE)