Basic usage of the package.
First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:
data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
## id V1 V2 true_clust
## 1 1 1.0667265 1.2974904 1
## 2 2 1.5669030 1.0507419 1
## 3 3 0.9216493 0.9818881 1
## 4 4 1.1005807 1.0826089 1
## 5 5 1.3043060 0.7386772 1
## 6 6 0.5905979 1.2199023 1
## 7 7 0.5640716 1.2009019 1
## 8 8 0.8026433 1.0143766 1
## 9 9 1.0162398 1.7195164 1
## 10 10 0.5520354 1.0998368 1
## 11 11 1.1953140 1.3663256 1
## 12 12 1.5992284 1.3281665 1
## 13 13 1.2955390 1.1698961 1
## 14 14 1.1234638 0.9912329 1
## 15 15 1.0126584 0.9744042 1
## 16 16 1.3119844 0.8811811 1
## 17 17 1.0552372 1.6257848 1
## 18 18 0.7999655 0.9811239 1
## 19 19 0.9281041 1.1814423 1
## 20 20 1.4051395 1.0918538 1
## 21 21 1.2337457 0.8609941 1
## 22 22 1.6904613 1.4090263 1
## 23 23 0.6058851 0.8359700 1
## 24 24 1.5577314 1.3954873 1
## 25 25 1.0574108 0.8103830 1
## 26 26 1.0821083 1.3491419 1
## 27 27 1.2773750 1.0908505 1
## 28 28 0.9083436 0.9377400 1
## 29 29 1.4753087 1.1393456 1
## 30 30 0.6261183 1.0678832 1
## 31 31 0.9971557 0.7110518 1
## 32 32 1.1286987 0.8751926 1
## 33 33 0.8602882 1.2884457 1
## 34 34 0.8720141 1.3431411 1
## 35 35 0.8564513 1.3415309 1
## 36 36 1.2779707 1.2513594 1
## 37 37 1.3410942 1.1915235 1
## 38 38 1.3170715 0.8314424 1
## 39 39 1.1620800 0.9975508 1
## 40 40 0.9644723 0.4914671 1
## 41 41 0.9637582 1.1634533 1
## 42 42 1.0621891 1.2628968 1
## 43 43 0.4116396 1.3365347 1
## 44 44 0.6080422 1.0226581 1
## 45 45 1.0981710 1.1814792 1
## 46 46 1.4061822 1.1482983 1
## 47 47 1.5635449 1.3122412 1
## 48 48 0.6009701 1.2520516 1
## 49 49 1.3759020 1.0235787 1
## 50 50 1.2488680 1.0132348 1
## 51 51 0.7966284 0.8174224 1
## 52 52 1.2437039 1.1594962 1
## 53 53 0.5141690 1.0295818 1
## 54 54 0.6242043 0.3423182 1
## 55 55 0.8449093 1.0426204 1
## 56 56 1.1172540 0.9446881 1
## 57 57 0.8123217 0.9360699 1
## 58 58 1.0578361 0.6289752 1
## 59 59 1.3776524 0.6354165 1
## 60 60 1.1025650 1.0735222 1
## 61 61 0.8449643 0.9240000 1
## 62 62 0.8131473 1.0000371 1
## 63 63 0.9731536 1.0191583 1
## 64 64 1.1998901 1.0817627 1
## 65 65 0.8762270 1.4395907 1
## 66 66 1.0997815 0.7537613 1
## 67 67 0.9370728 1.0533252 1
## 68 68 0.8842632 0.8134801 1
## 69 69 1.0250015 1.0604346 1
## 70 70 0.9508633 1.4304047 1
## 71 71 1.3018849 0.7042134 1
## 72 72 1.0309243 0.9791680 1
## 73 73 1.4766626 0.8399630 1
## 74 74 0.7077205 1.1032085 1
## 75 75 0.9465240 0.7801880 1
## 76 76 0.2887217 0.7269880 1
## 77 77 1.1768653 0.9117824 1
## 78 78 0.5618219 1.3393789 1
## 79 79 1.2671184 1.0621367 1
## 80 80 1.8189428 1.3351707 1
## 81 81 0.8799897 0.7404425 1
## 82 82 0.7193279 0.5906543 1
## 83 83 1.2715284 0.9753877 1
## 84 84 0.9896182 0.9457459 1
## 85 85 1.1023038 0.9017862 1
## 86 86 0.8354561 1.2557587 1
## 87 87 1.3565034 0.6625389 1
## 88 88 1.4917184 1.7051537 1
## 89 89 0.7806541 0.8768405 1
## 90 90 0.5749884 0.6220421 1
## 91 91 0.5285356 0.9881802 1
## 92 92 0.8048000 0.7652237 1
## 93 93 0.9155977 0.7346152 1
## 94 94 0.5170804 1.3764329 1
## 95 95 0.9132864 1.1759861 1
## 96 96 0.5753730 2.0344629 1
## 97 97 0.6270602 1.0425871 1
## 98 98 1.4039415 0.7822955 1
## 99 99 1.4699544 0.6457397 1
## 100 100 0.1966918 0.8655827 1
## 101 101 1.8650106 1.8228915 2
## 102 102 2.5035777 2.0348018 2
## 103 103 2.0709071 1.5802315 2
## 104 104 1.7802285 2.1852078 2
## 105 105 2.1404162 1.4695159 2
## 106 106 2.4045219 1.8141060 2
## 107 107 2.2754578 1.6387158 2
## 108 108 1.8346851 1.7169400 2
## 109 109 2.2179101 2.1954281 2
## 110 110 2.1495384 1.6535421 2
## 111 111 1.3661693 2.2240482 2
## 112 112 1.7481608 2.3856441 2
## 113 113 2.1163383 2.3305007 2
## 114 114 2.5519002 1.8605425 2
## 115 115 2.1632306 2.1733376 2
## 116 116 2.1993959 1.7466690 2
## 117 117 1.6317558 1.3180457 2
## 118 118 1.8718640 2.0420516 2
## 119 119 1.7576361 1.8369556 2
## 120 120 2.1777485 1.9536790 2
## 121 121 1.5452981 1.6430946 2
## 122 122 1.7374044 1.9629580 2
## 123 123 2.4174326 1.7617914 2
## 124 124 2.3580288 2.1205054 2
## 125 125 2.5894833 2.3024864 2
## 126 126 2.4024874 2.0252050 2
## 127 127 2.3393553 1.9646018 2
## 128 128 1.8570918 1.7799213 2
## 129 129 1.9375662 1.8307695 2
## 130 130 1.4850182 1.8154804 2
## 131 131 0.8317172 1.9615005 2
## 132 132 2.1698507 2.4288118 2
## 133 133 2.1180596 1.5451124 2
## 134 134 1.8866094 2.0823006 2
## 135 135 2.4306652 1.9360392 2
## 136 136 1.3980575 2.4825147 2
## 137 137 1.9360886 2.4161774 2
## 138 138 1.8972829 2.2865548 2
## 139 139 1.9729965 2.2800574 2
## 140 140 2.0121408 2.5490452 2
## 141 141 2.1194526 2.3343004 2
## 142 142 1.7188031 1.5755906 2
## 143 143 2.5065627 1.9425107 2
## 144 144 2.4178548 2.5581941 2
## 145 145 2.0303526 2.0380918 2
## 146 146 1.8127356 1.8608428 2
## 147 147 2.0976676 1.8445404 2
## 148 148 2.3682093 1.9945662 2
## 149 149 1.7638344 2.3282711 2
## 150 150 1.6501673 2.3606737 2
## 151 151 2.0826567 2.5551857 2
## 152 152 2.1724808 1.4973861 2
## 153 153 2.4660017 2.2656568 2
## 154 154 2.5845215 1.7484647 2
## 155 155 1.7014977 1.9251035 2
## 156 156 2.3088995 2.1917565 2
## 157 157 2.4198413 2.1507954 2
## 158 158 2.0380028 1.8934676 2
## 159 159 2.0486454 1.8723494 2
## 160 160 2.1961128 1.8303259 2
## 161 161 1.7449725 1.7456562 2
## 162 162 1.8855708 1.7480916 2
## 163 163 1.7633544 2.0768556 2
## 164 164 2.0724397 2.5699561 2
## 165 165 1.5544595 2.0827752 2
## 166 166 1.6571505 1.5651161 2
## 167 167 2.1461370 1.7485831 2
## 168 168 2.1997976 1.9045547 2
## 169 169 1.8012821 2.2486367 2
## 170 170 1.9516914 1.7959914 2
## 171 171 1.5812671 1.3933264 2
## 172 172 2.2452198 1.7497464 2
## 173 173 2.3633530 1.6304779 2
## 174 174 1.8123147 1.4706795 2
## 175 175 1.8616742 2.0855991 2
## 176 176 1.7058378 1.8463885 2
## 177 177 1.3846781 2.1272488 2
## 178 178 2.2309258 2.5877900 2
## 179 179 1.7186580 1.7562195 2
## 180 180 1.8789516 2.5313413 2
## 181 181 1.4183225 2.2602353 2
## 182 182 0.9804335 1.8343316 2
## 183 183 1.8078194 1.9269470 2
## 184 184 2.2857103 1.6987145 2
## 185 185 2.1261888 1.7775692 2
## 186 186 2.2834598 2.2363101 2
## 187 187 1.8863905 2.3363181 2
## 188 188 1.8296542 1.8167206 2
## 189 189 1.7950462 1.8876026 2
## 190 190 2.1653800 1.9528807 2
## 191 191 2.3011832 1.7582755 2
## 192 192 2.1881185 2.0034041 2
## 193 193 1.6343435 2.1465149 2
## 194 194 2.0790663 1.4291535 2
## 195 195 1.6770096 2.0365001 2
## 196 196 1.5854592 2.3145920 2
## 197 197 2.3392047 2.5347724 2
## 198 198 2.1520123 1.6718351 2
## 199 199 2.3546154 2.2068461 2
## 200 200 2.0965418 1.6386522 2
## 201 201 3.1582948 3.2817822 3
## 202 202 3.0324669 2.9516739 3
## 203 203 2.8183399 3.5035701 3
## 204 204 2.6817798 2.9180582 3
## 205 205 2.7808847 3.0844466 3
## 206 206 2.8808048 2.8296571 3
## 207 207 3.2834376 3.0076797 3
## 208 208 3.2670560 2.3872379 3
## 209 209 2.9112772 3.2753139 3
## 210 210 3.5347984 2.9132633 3
## 211 211 2.9866980 3.5704456 3
## 212 212 2.9475630 3.1103896 3
## 213 213 3.6914637 3.5650306 3
## 214 214 2.8297386 3.0184972 3
## 215 215 3.0064852 2.3909167 3
## 216 216 3.2958027 3.0497020 3
## 217 217 3.2940603 3.0182022 3
## 218 218 2.9120480 2.8546845 3
## 219 219 2.9842337 3.3483988 3
## 220 220 3.3505244 3.5677951 3
## 221 221 2.6410030 2.8457986 3
## 222 222 2.9100960 3.4960909 3
## 223 223 3.1094851 2.8974510 3
## 224 224 3.2596545 3.0012766 3
## 225 225 2.9191755 3.2961545 3
## 226 226 3.0616052 2.9854865 3
## 227 227 3.1084309 3.4781697 3
## 228 228 3.1889907 3.4533466 3
## 229 229 2.8498632 3.1324070 3
## 230 230 3.0476210 3.3198091 3
## 231 231 3.1947123 2.7340885 3
## 232 232 3.2642629 2.7866811 3
## 233 233 3.2633253 2.2783362 3
## 234 234 2.8702660 2.7371004 3
## 235 235 3.0800114 2.6517472 3
## 236 236 2.8861520 3.4423209 3
## 237 237 3.0677828 3.3158418 3
## 238 238 2.7887346 2.2638015 3
## 239 239 3.1258504 2.5792482 3
## 240 240 3.1628658 2.7525779 3
## 241 241 3.2192984 2.5263849 3
## 242 242 3.1183246 2.7606926 3
## 243 243 3.2507699 3.4337573 3
## 244 244 3.0452794 3.2743320 3
## 245 245 2.8066416 2.8469756 3
## 246 246 3.3232539 2.6161428 3
## 247 247 3.1623044 2.8852779 3
## 248 248 3.1590963 2.9107673 3
## 249 249 2.5175207 3.1755530 3
## 250 250 2.9070490 3.3371424 3
## 251 251 3.2223458 2.9656005 3
## 252 252 3.1985484 2.7809807 3
## 253 253 3.0316803 2.9058037 3
## 254 254 2.4238337 3.2605995 3
## 255 255 2.6941865 3.0278952 3
## 256 256 2.5748253 2.7013106 3
## 257 257 3.2986795 3.1174259 3
## 258 258 2.6425800 2.1964654 3
## 259 259 2.5709755 3.0196524 3
## 260 260 3.2014407 3.1667301 3
## 261 261 2.9971207 3.3002295 3
## 262 262 3.4621043 2.9682704 3
## 263 263 2.9321147 2.5971883 3
## 264 264 3.0409185 3.4695090 3
## 265 265 3.0373145 2.4586874 3
## 266 266 3.1736631 2.6361699 3
## 267 267 2.6727348 3.1018170 3
## 268 268 3.0759845 2.9310834 3
## 269 269 2.3170393 3.2172804 3
## 270 270 2.9326255 3.6379068 3
## 271 271 3.5626431 3.4774382 3
## 272 272 3.0507725 3.0392838 3
## 273 273 3.3281075 2.6704701 3
## 274 274 2.9400647 3.5905523 3
## 275 275 2.5671612 2.9392090 3
## 276 276 3.0305182 2.9148003 3
## 277 277 3.0971165 3.2032150 3
## 278 278 3.0239877 2.9992575 3
## 279 279 3.1436436 3.3177178 3
## 280 280 2.7237814 2.7846960 3
## 281 281 2.6488493 2.9403778 3
## 282 282 3.5028389 2.7374304 3
## 283 283 2.9100123 2.9941576 3
## 284 284 2.6551342 3.1751063 3
## 285 285 2.4368569 3.3106858 3
## 286 286 3.2537246 3.0023797 3
## 287 287 2.5251198 3.1648635 3
## 288 288 3.4102319 2.6574130 3
## 289 289 3.1919072 2.8157314 3
## 290 290 3.4064629 3.4305026 3
## 291 291 3.0079670 3.6826106 3
## 292 292 3.7393616 3.1616663 3
## 293 293 3.1898541 3.3348883 3
## 294 294 3.1957236 2.8465667 3
## 295 295 3.0556766 3.2829843 3
## 296 296 3.6260623 2.6645142 3
## 297 297 2.6417746 3.0416318 3
## 298 298 3.2317155 3.1088961 3
## 299 299 3.1938757 2.7928641 3
## 300 300 2.2231145 3.1224288 3
## 301 301 4.5359765 3.7180034 4
## 302 302 3.9555011 4.3461914 4
## 303 303 3.9635956 3.8936239 4
## 304 304 4.0641252 4.0590676 4
## 305 305 3.8826908 3.7890478 4
## 306 306 3.5947894 4.2408776 4
## 307 307 3.8539515 4.1816583 4
## 308 308 3.9953827 4.0160001 4
## 309 309 4.3505713 4.0675379 4
## 310 310 4.1699464 3.9732064 4
## 311 311 3.4242432 3.6522741 4
## 312 312 4.1250367 4.3059641 4
## 313 313 4.2805916 4.5032701 4
## 314 314 3.7358389 4.4479817 4
## 315 315 4.2973374 4.2602548 4
## 316 316 4.0496482 4.6119820 4
## 317 317 4.2761301 4.7507636 4
## 318 318 3.8912117 4.1998976 4
## 319 319 4.1961193 4.1457364 4
## 320 320 3.7188606 3.7647725 4
## 321 321 4.2077306 4.5999424 4
## 322 322 3.7416941 4.1484901 4
## 323 323 3.3222681 4.4997617 4
## 324 324 3.7543036 4.2716073 4
## 325 325 3.9527188 3.6590467 4
## 326 326 4.0381956 3.7186242 4
## 327 327 3.8806544 3.9070130 4
## 328 328 3.9827942 3.9598205 4
## 329 329 4.0226557 3.6212487 4
## 330 330 4.1956919 4.5781945 4
## 331 331 3.8540425 3.9128319 4
## 332 332 4.0616431 4.0036028 4
## 333 333 3.9617479 4.0205777 4
## 334 334 4.1301273 3.8929814 4
## 335 335 3.9303327 3.6822656 4
## 336 336 3.8639665 4.6689760 4
## 337 337 3.8498979 4.3259294 4
## 338 338 3.7456614 4.2190044 4
## 339 339 3.5996916 4.0785205 4
## 340 340 4.2211286 3.6889119 4
## 341 341 3.6359034 3.5863260 4
## 342 342 3.9374437 4.3484828 4
## 343 343 4.4620644 3.7832389 4
## 344 344 3.8583607 3.3736348 4
## 345 345 4.2358756 4.2060441 4
## 346 346 4.4409623 3.7583155 4
## 347 347 3.9408002 3.7366986 4
## 348 348 4.4116127 3.5505830 4
## 349 349 4.2196721 4.0230395 4
## 350 350 4.2743564 4.3818073 4
## 351 351 4.0834435 4.1319625 4
## 352 352 3.8766559 4.5851336 4
## 353 353 4.1009444 3.7675707 4
## 354 354 4.0688708 4.5938085 4
## 355 355 3.5415940 3.6509124 4
## 356 356 3.4175004 4.1507475 4
## 357 357 4.8080342 3.5950536 4
## 358 358 4.2245045 4.0363771 4
## 359 359 4.3707505 3.9898450 4
## 360 360 3.5380312 4.4715776 4
## 361 361 3.9736378 4.0223226 4
## 362 362 4.3070604 4.0731841 4
## 363 363 4.4301646 4.2642174 4
## 364 364 3.5943743 4.3678552 4
## 365 365 4.3518757 4.3297973 4
## 366 366 4.0038028 4.6589989 4
## 367 367 3.4916286 3.9650951 4
## 368 368 3.7401950 3.7304495 4
## 369 369 4.3564057 4.4083939 4
## 370 370 4.1282396 3.7711440 4
## 371 371 4.2755948 3.5237243 4
## 372 372 4.2628353 4.5689097 4
## 373 373 4.1496029 3.4827091 4
## 374 374 4.3043997 4.0827318 4
## 375 375 4.0068700 3.9588135 4
## 376 376 4.0579176 3.3274770 4
## 377 377 3.7807398 3.6628580 4
## 378 378 4.6942437 4.0639392 4
## 379 379 3.8853819 3.9131507 4
## 380 380 4.4660029 3.7494534 4
## 381 381 4.2689441 3.9755479 4
## 382 382 3.9050029 3.5893448 4
## 383 383 3.4540026 3.1477135 4
## 384 384 3.6639532 3.7478090 4
## 385 385 3.9682711 4.1421898 4
## 386 386 4.2559583 4.5019210 4
## 387 387 4.0861906 3.7950035 4
## 388 388 3.9331711 3.9469074 4
## 389 389 3.7591473 4.1226238 4
## 390 390 3.7572780 3.8618581 4
## 391 391 4.0981217 3.8540294 4
## 392 392 3.9805710 4.1916993 4
## 393 393 4.2266629 4.1275205 4
## 394 394 3.7137744 4.2314257 4
## 395 395 3.5422981 3.7211093 4
## 396 396 3.2119115 3.6943840 4
## 397 397 3.9658752 4.3221165 4
## 398 398 3.4408164 3.9808492 4
## 399 399 3.6198537 4.1570393 4
## 400 400 4.0005277 4.2579742 4
## 401 401 4.9629049 4.2422309 5
## 402 402 5.1840698 5.0070322 5
## 403 403 5.0771319 4.9495746 5
## 404 404 5.1922847 4.3193401 5
## 405 405 4.7447964 5.0256347 5
## 406 406 5.2756551 4.9227218 5
## 407 407 4.9220268 4.7658744 5
## 408 408 4.9456442 4.6766726 5
## 409 409 5.0101160 4.7870993 5
## 410 410 4.3928139 5.0376128 5
## 411 411 4.8620394 5.2395791 5
## 412 412 5.1552361 4.3784459 5
## 413 413 4.8395224 5.1605822 5
## 414 414 5.0123625 4.8462883 5
## 415 415 4.9399809 4.6627123 5
## 416 416 4.7543933 5.0816301 5
## 417 417 5.2475480 4.9361446 5
## 418 418 4.9609575 4.9913760 5
## 419 419 4.7977331 4.9085418 5
## 420 420 4.9352900 5.1609235 5
## 421 421 5.0124362 5.5309027 5
## 422 422 4.7023587 5.0047688 5
## 423 423 5.6711915 5.0633338 5
## 424 424 5.2414829 4.4815822 5
## 425 425 5.1636573 4.6373326 5
## 426 426 5.3863081 5.1365235 5
## 427 427 5.2947810 4.7475621 5
## 428 428 5.3187440 5.4816437 5
## 429 429 4.8578147 4.9280052 5
## 430 430 5.2357165 5.5846631 5
## 431 431 4.9859486 4.9714519 5
## 432 432 4.8568890 4.9956828 5
## 433 433 4.6360512 4.7197370 5
## 434 434 5.1394574 5.1915715 5
## 435 435 5.3449901 4.8060268 5
## 436 436 4.6981069 5.4857868 5
## 437 437 5.1869949 4.9236752 5
## 438 438 4.7888578 4.7330719 5
## 439 439 4.6198838 4.3638818 5
## 440 440 5.0116934 4.6114746 5
## 441 441 4.9839307 5.0914839 5
## 442 442 4.3115221 4.6426046 5
## 443 443 5.0603603 5.3649108 5
## 444 444 4.3935056 5.3382878 5
## 445 445 4.7919287 4.9022007 5
## 446 446 5.0069875 4.8662688 5
## 447 447 5.2848705 4.9361746 5
## 448 448 5.1318305 4.2595863 5
## 449 449 4.8061009 5.0554884 5
## 450 450 4.8059121 4.7641212 5
## 451 451 4.9462178 4.8988535 5
## 452 452 4.9971115 5.2482229 5
## 453 453 4.8560232 5.3053471 5
## 454 454 5.0850055 5.2744325 5
## 455 455 4.5219593 5.3878710 5
## 456 456 5.2696316 4.9126692 5
## 457 457 4.8642463 4.9429658 5
## 458 458 5.1449609 5.2164283 5
## 459 459 5.0937129 4.9856145 5
## 460 460 5.1040760 5.4830499 5
## 461 461 4.8166429 4.8559136 5
## 462 462 4.9763918 4.7703782 5
## 463 463 4.5589634 5.7778211 5
## 464 464 5.2792591 4.9234981 5
## 465 465 4.9917173 5.0636682 5
## 466 466 4.5066792 4.9043998 5
## 467 467 5.2588917 4.5656649 5
## 468 468 4.6186006 5.3081865 5
## 469 469 5.2702501 5.4359850 5
## 470 470 4.7221813 5.5162830 5
## 471 471 4.5440553 4.8790472 5
## 472 472 4.6623390 5.1527867 5
## 473 473 4.5438913 5.3159415 5
## 474 474 5.1648708 5.1808000 5
## 475 475 4.8522698 5.0159296 5
## 476 476 5.0374700 4.8647180 5
## 477 477 4.7017291 5.3097452 5
## 478 478 4.7472060 5.1970918 5
## 479 479 4.8637476 4.9439548 5
## 480 480 4.7405159 4.8950215 5
## 481 481 5.6405392 4.8614680 5
## 482 482 4.9037743 5.1033143 5
## 483 483 4.7239543 4.8959548 5
## 484 484 5.5427481 4.8978228 5
## 485 485 5.3157870 4.9245687 5
## 486 486 4.5964368 5.4851325 5
## 487 487 4.6640333 5.4468762 5
## 488 488 4.9004773 5.0444003 5
## 489 489 4.9120045 5.1658056 5
## 490 490 5.3743472 5.1546086 5
## 491 491 4.9536544 4.9044288 5
## 492 492 5.0237452 4.6587183 5
## 493 493 5.5639374 5.4494763 5
## 494 494 5.2886115 4.4582237 5
## 495 495 4.7506781 5.0871638 5
## 496 496 5.2716727 4.9269265 5
## 497 497 4.8795458 5.0449881 5
## 498 498 5.3628868 5.4361093 5
## 499 499 5.2961830 5.5908125 5
## 500 500 5.2156221 5.0953297 5
This is how our data looks like:
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "true cluster")
Now we can cluster it using kmeans++:
rownames(data) <- data$id
data_for_clust <- data %>% select(starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
k = 5,
metric = "euclid",
verbose = TRUE
)
## will generate seeds
## generating seeds
## at seed 0
## add new core from 43 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 448 dist was 2.84921
## add new core from 448 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 222 dist was 1.3774
## add new core from 222 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 333 dist was 0.672893
## add new core from 333 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 108 dist was 0.571462
## add new core from 108 to 4
## reassign after init
## iter 0
## iter 1 changed 2
## iter 1
## iter 2 changed 0
The returned list contains 3 fields:
names(km)
## [1] "centers" "cluster" "size"
km$centers
contains a tibble with clust
column and the cluster centers:
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 2.03 1.98
## 2 2 4.01 4.04
## 3 3 3.04 3.04
## 4 4 1.00 1.06
## 5 5 4.98 5.02
clusters are numbered according to order_func
(see
‘Custom cluster ordering’ section).
km$cluster
contains tibble with id
column
with the observation id (1:n
if no id column was supplied),
and clust
column with the observation assigned cluster:
km$cluster
## # A tibble: 500 × 2
## id clust
## <chr> <int>
## 1 1 4
## 2 2 4
## 3 3 4
## 4 4 4
## 5 5 4
## 6 6 4
## 7 7 4
## 8 8 4
## 9 9 4
## 10 10 4
## # ℹ 490 more rows
km$size
contains tibble with clust
column
and n
column with the number of points in each cluster:
km$size
## # A tibble: 5 × 2
## clust n
## <int> <int>
## 1 1 101
## 2 2 100
## 3 3 99
## 4 4 101
## 5 5 99
We can now check our clustering performance - fraction of
observations that were classified correctly (Note that
match_clusters
function is internal to the package and is
used only in this vignette):
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.97
And plot the results:
By default, the clusters where ordered using the following function:
hclust(dist(cor(t(centers))))
- hclust of the euclidean
distance of the correlation matrix of the centers.
We can supply our own function to order the clusters using
reorder_func
argument. The function would be applied to
each center and he clusters would be ordered by the result.
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE,
reorder_func = median
)
km$centers
## # A tibble: 5 × 4
## clust id V1 V2
## <int> <dbl> <dbl> <dbl>
## 1 1 51.5 1.04 1.06
## 2 2 153 2.02 2.02
## 3 3 254. 3.06 3.05
## 4 4 353 4.01 4.05
## 5 5 452. 4.98 5.02
tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:
## id V1 V2 true_clust
## 1 1 1.0667265 1.2974904 1
## 2 2 1.5669030 1.0507419 1
## 3 3 0.9216493 0.9818881 1
## 4 4 1.1005807 1.0826089 1
## 5 5 1.3043060 0.7386772 1
## 6 6 NA 1.2199023 1
## 7 7 0.5640716 1.2009019 1
## 8 8 0.8026433 1.0143766 1
## 9 9 NA 1.7195164 1
## 10 10 0.5520354 1.0998368 1
## 11 11 1.1953140 1.3663256 1
## 12 12 1.5992284 1.3281665 1
## 13 13 NA 1.1698961 1
## 14 14 1.1234638 0.9912329 1
## 15 15 1.0126584 0.9744042 1
## 16 16 1.3119844 0.8811811 1
## 17 17 1.0552372 1.6257848 1
## 18 18 NA 0.9811239 1
## 19 19 0.9281041 1.1814423 1
## 20 20 NA 1.0918538 1
## 21 21 NA 0.8609941 1
## 22 22 1.6904613 1.4090263 1
## 23 23 NA 0.8359700 1
## 24 24 NA 1.3954873 1
## 25 25 1.0574108 0.8103830 1
## 26 26 1.0821083 1.3491419 1
## 27 27 1.2773750 1.0908505 1
## 28 28 0.9083436 0.9377400 1
## 29 29 1.4753087 1.1393456 1
## 30 30 NA 1.0678832 1
## 31 31 0.9971557 0.7110518 1
## 32 32 1.1286987 0.8751926 1
## 33 33 0.8602882 1.2884457 1
## 34 34 0.8720141 1.3431411 1
## 35 35 0.8564513 1.3415309 1
## 36 36 NA 1.2513594 1
## 37 37 NA 1.1915235 1
## 38 38 1.3170715 0.8314424 1
## 39 39 1.1620800 0.9975508 1
## 40 40 0.9644723 0.4914671 1
## 41 41 0.9637582 1.1634533 1
## 42 42 1.0621891 1.2628968 1
## 43 43 0.4116396 1.3365347 1
## 44 44 0.6080422 1.0226581 1
## 45 45 NA 1.1814792 1
## 46 46 1.4061822 1.1482983 1
## 47 47 NA 1.3122412 1
## 48 48 0.6009701 1.2520516 1
## 49 49 1.3759020 1.0235787 1
## 50 50 1.2488680 1.0132348 1
## 51 51 NA 0.8174224 1
## 52 52 1.2437039 1.1594962 1
## 53 53 0.5141690 1.0295818 1
## 54 54 0.6242043 0.3423182 1
## 55 55 0.8449093 1.0426204 1
## 56 56 1.1172540 0.9446881 1
## 57 57 0.8123217 0.9360699 1
## 58 58 1.0578361 0.6289752 1
## 59 59 NA 0.6354165 1
## 60 60 1.1025650 1.0735222 1
## 61 61 0.8449643 0.9240000 1
## 62 62 0.8131473 1.0000371 1
## 63 63 0.9731536 1.0191583 1
## 64 64 1.1998901 1.0817627 1
## 65 65 0.8762270 1.4395907 1
## 66 66 1.0997815 0.7537613 1
## 67 67 0.9370728 1.0533252 1
## 68 68 0.8842632 0.8134801 1
## 69 69 1.0250015 1.0604346 1
## 70 70 0.9508633 1.4304047 1
## 71 71 1.3018849 0.7042134 1
## 72 72 NA 0.9791680 1
## 73 73 1.4766626 0.8399630 1
## 74 74 0.7077205 1.1032085 1
## 75 75 0.9465240 0.7801880 1
## 76 76 NA 0.7269880 1
## 77 77 1.1768653 0.9117824 1
## 78 78 0.5618219 1.3393789 1
## 79 79 1.2671184 1.0621367 1
## 80 80 1.8189428 1.3351707 1
## 81 81 0.8799897 0.7404425 1
## 82 82 0.7193279 0.5906543 1
## 83 83 NA 0.9753877 1
## 84 84 0.9896182 0.9457459 1
## 85 85 1.1023038 0.9017862 1
## 86 86 0.8354561 1.2557587 1
## 87 87 1.3565034 0.6625389 1
## 88 88 NA 1.7051537 1
## 89 89 0.7806541 0.8768405 1
## 90 90 0.5749884 0.6220421 1
## 91 91 0.5285356 0.9881802 1
## 92 92 0.8048000 0.7652237 1
## 93 93 0.9155977 0.7346152 1
## 94 94 0.5170804 1.3764329 1
## 95 95 0.9132864 1.1759861 1
## 96 96 0.5753730 2.0344629 1
## 97 97 0.6270602 1.0425871 1
## 98 98 1.4039415 0.7822955 1
## 99 99 1.4699544 0.6457397 1
## 100 100 0.1966918 0.8655827 1
## 101 101 1.8650106 1.8228915 2
## 102 102 NA 2.0348018 2
## 103 103 2.0709071 1.5802315 2
## 104 104 1.7802285 2.1852078 2
## 105 105 2.1404162 1.4695159 2
## 106 106 2.4045219 1.8141060 2
## 107 107 2.2754578 1.6387158 2
## 108 108 1.8346851 1.7169400 2
## 109 109 2.2179101 2.1954281 2
## 110 110 NA 1.6535421 2
## 111 111 1.3661693 2.2240482 2
## 112 112 NA 2.3856441 2
## 113 113 NA 2.3305007 2
## 114 114 2.5519002 1.8605425 2
## 115 115 NA 2.1733376 2
## 116 116 2.1993959 1.7466690 2
## 117 117 1.6317558 1.3180457 2
## 118 118 1.8718640 2.0420516 2
## 119 119 1.7576361 1.8369556 2
## 120 120 NA 1.9536790 2
## 121 121 1.5452981 1.6430946 2
## 122 122 1.7374044 1.9629580 2
## 123 123 2.4174326 1.7617914 2
## 124 124 2.3580288 2.1205054 2
## 125 125 2.5894833 2.3024864 2
## 126 126 NA 2.0252050 2
## 127 127 2.3393553 1.9646018 2
## 128 128 NA 1.7799213 2
## 129 129 1.9375662 1.8307695 2
## 130 130 1.4850182 1.8154804 2
## 131 131 0.8317172 1.9615005 2
## 132 132 2.1698507 2.4288118 2
## 133 133 2.1180596 1.5451124 2
## 134 134 1.8866094 2.0823006 2
## 135 135 2.4306652 1.9360392 2
## 136 136 1.3980575 2.4825147 2
## 137 137 1.9360886 2.4161774 2
## 138 138 1.8972829 2.2865548 2
## 139 139 1.9729965 2.2800574 2
## 140 140 2.0121408 2.5490452 2
## 141 141 NA 2.3343004 2
## 142 142 NA 1.5755906 2
## 143 143 2.5065627 1.9425107 2
## 144 144 2.4178548 2.5581941 2
## 145 145 2.0303526 2.0380918 2
## 146 146 1.8127356 1.8608428 2
## 147 147 2.0976676 1.8445404 2
## 148 148 2.3682093 1.9945662 2
## 149 149 1.7638344 2.3282711 2
## 150 150 NA 2.3606737 2
## 151 151 2.0826567 2.5551857 2
## 152 152 NA 1.4973861 2
## 153 153 2.4660017 2.2656568 2
## 154 154 2.5845215 1.7484647 2
## 155 155 1.7014977 1.9251035 2
## 156 156 2.3088995 2.1917565 2
## 157 157 2.4198413 2.1507954 2
## 158 158 2.0380028 1.8934676 2
## 159 159 2.0486454 1.8723494 2
## 160 160 2.1961128 1.8303259 2
## 161 161 1.7449725 1.7456562 2
## 162 162 NA 1.7480916 2
## 163 163 NA 2.0768556 2
## 164 164 2.0724397 2.5699561 2
## 165 165 1.5544595 2.0827752 2
## 166 166 1.6571505 1.5651161 2
## 167 167 NA 1.7485831 2
## 168 168 2.1997976 1.9045547 2
## 169 169 1.8012821 2.2486367 2
## 170 170 1.9516914 1.7959914 2
## 171 171 1.5812671 1.3933264 2
## 172 172 2.2452198 1.7497464 2
## 173 173 2.3633530 1.6304779 2
## 174 174 1.8123147 1.4706795 2
## 175 175 1.8616742 2.0855991 2
## 176 176 1.7058378 1.8463885 2
## 177 177 1.3846781 2.1272488 2
## 178 178 2.2309258 2.5877900 2
## 179 179 1.7186580 1.7562195 2
## 180 180 1.8789516 2.5313413 2
## 181 181 1.4183225 2.2602353 2
## 182 182 0.9804335 1.8343316 2
## 183 183 NA 1.9269470 2
## 184 184 2.2857103 1.6987145 2
## 185 185 2.1261888 1.7775692 2
## 186 186 2.2834598 2.2363101 2
## 187 187 NA 2.3363181 2
## 188 188 1.8296542 1.8167206 2
## 189 189 NA 1.8876026 2
## 190 190 NA 1.9528807 2
## 191 191 2.3011832 1.7582755 2
## 192 192 2.1881185 2.0034041 2
## 193 193 1.6343435 2.1465149 2
## 194 194 2.0790663 1.4291535 2
## 195 195 1.6770096 2.0365001 2
## 196 196 1.5854592 2.3145920 2
## 197 197 2.3392047 2.5347724 2
## 198 198 2.1520123 1.6718351 2
## 199 199 2.3546154 2.2068461 2
## 200 200 2.0965418 1.6386522 2
## 201 201 3.1582948 3.2817822 3
## 202 202 3.0324669 2.9516739 3
## 203 203 2.8183399 3.5035701 3
## 204 204 2.6817798 2.9180582 3
## 205 205 NA 3.0844466 3
## 206 206 2.8808048 2.8296571 3
## 207 207 3.2834376 3.0076797 3
## 208 208 3.2670560 2.3872379 3
## 209 209 2.9112772 3.2753139 3
## 210 210 3.5347984 2.9132633 3
## 211 211 2.9866980 3.5704456 3
## 212 212 NA 3.1103896 3
## 213 213 NA 3.5650306 3
## 214 214 2.8297386 3.0184972 3
## 215 215 3.0064852 2.3909167 3
## 216 216 3.2958027 3.0497020 3
## 217 217 3.2940603 3.0182022 3
## 218 218 2.9120480 2.8546845 3
## 219 219 2.9842337 3.3483988 3
## 220 220 NA 3.5677951 3
## 221 221 2.6410030 2.8457986 3
## 222 222 2.9100960 3.4960909 3
## 223 223 3.1094851 2.8974510 3
## 224 224 3.2596545 3.0012766 3
## 225 225 NA 3.2961545 3
## 226 226 3.0616052 2.9854865 3
## 227 227 3.1084309 3.4781697 3
## 228 228 3.1889907 3.4533466 3
## 229 229 2.8498632 3.1324070 3
## 230 230 3.0476210 3.3198091 3
## 231 231 3.1947123 2.7340885 3
## 232 232 3.2642629 2.7866811 3
## 233 233 3.2633253 2.2783362 3
## 234 234 2.8702660 2.7371004 3
## 235 235 3.0800114 2.6517472 3
## 236 236 2.8861520 3.4423209 3
## 237 237 3.0677828 3.3158418 3
## 238 238 NA 2.2638015 3
## 239 239 NA 2.5792482 3
## 240 240 3.1628658 2.7525779 3
## 241 241 3.2192984 2.5263849 3
## 242 242 3.1183246 2.7606926 3
## 243 243 NA 3.4337573 3
## 244 244 3.0452794 3.2743320 3
## 245 245 NA 2.8469756 3
## 246 246 3.3232539 2.6161428 3
## 247 247 NA 2.8852779 3
## 248 248 3.1590963 2.9107673 3
## 249 249 2.5175207 3.1755530 3
## 250 250 NA 3.3371424 3
## 251 251 3.2223458 2.9656005 3
## 252 252 NA 2.7809807 3
## 253 253 3.0316803 2.9058037 3
## 254 254 2.4238337 3.2605995 3
## 255 255 2.6941865 3.0278952 3
## 256 256 2.5748253 2.7013106 3
## 257 257 3.2986795 3.1174259 3
## 258 258 2.6425800 2.1964654 3
## 259 259 2.5709755 3.0196524 3
## 260 260 NA 3.1667301 3
## 261 261 2.9971207 3.3002295 3
## 262 262 3.4621043 2.9682704 3
## 263 263 2.9321147 2.5971883 3
## 264 264 3.0409185 3.4695090 3
## 265 265 3.0373145 2.4586874 3
## 266 266 3.1736631 2.6361699 3
## 267 267 NA 3.1018170 3
## 268 268 3.0759845 2.9310834 3
## 269 269 NA 3.2172804 3
## 270 270 2.9326255 3.6379068 3
## 271 271 3.5626431 3.4774382 3
## 272 272 NA 3.0392838 3
## 273 273 3.3281075 2.6704701 3
## 274 274 2.9400647 3.5905523 3
## 275 275 2.5671612 2.9392090 3
## 276 276 3.0305182 2.9148003 3
## 277 277 3.0971165 3.2032150 3
## 278 278 3.0239877 2.9992575 3
## 279 279 3.1436436 3.3177178 3
## 280 280 2.7237814 2.7846960 3
## 281 281 2.6488493 2.9403778 3
## 282 282 3.5028389 2.7374304 3
## 283 283 2.9100123 2.9941576 3
## 284 284 2.6551342 3.1751063 3
## 285 285 2.4368569 3.3106858 3
## 286 286 NA 3.0023797 3
## 287 287 2.5251198 3.1648635 3
## 288 288 3.4102319 2.6574130 3
## 289 289 3.1919072 2.8157314 3
## 290 290 3.4064629 3.4305026 3
## 291 291 3.0079670 3.6826106 3
## 292 292 3.7393616 3.1616663 3
## 293 293 NA 3.3348883 3
## 294 294 3.1957236 2.8465667 3
## 295 295 3.0556766 3.2829843 3
## 296 296 NA 2.6645142 3
## 297 297 2.6417746 3.0416318 3
## 298 298 3.2317155 3.1088961 3
## 299 299 3.1938757 2.7928641 3
## 300 300 2.2231145 3.1224288 3
## 301 301 4.5359765 3.7180034 4
## 302 302 3.9555011 4.3461914 4
## 303 303 3.9635956 3.8936239 4
## 304 304 4.0641252 4.0590676 4
## 305 305 3.8826908 3.7890478 4
## 306 306 3.5947894 4.2408776 4
## 307 307 3.8539515 4.1816583 4
## 308 308 3.9953827 4.0160001 4
## 309 309 4.3505713 4.0675379 4
## 310 310 4.1699464 3.9732064 4
## 311 311 3.4242432 3.6522741 4
## 312 312 NA 4.3059641 4
## 313 313 4.2805916 4.5032701 4
## 314 314 3.7358389 4.4479817 4
## 315 315 4.2973374 4.2602548 4
## 316 316 4.0496482 4.6119820 4
## 317 317 4.2761301 4.7507636 4
## 318 318 3.8912117 4.1998976 4
## 319 319 4.1961193 4.1457364 4
## 320 320 3.7188606 3.7647725 4
## 321 321 4.2077306 4.5999424 4
## 322 322 3.7416941 4.1484901 4
## 323 323 3.3222681 4.4997617 4
## 324 324 3.7543036 4.2716073 4
## 325 325 3.9527188 3.6590467 4
## 326 326 4.0381956 3.7186242 4
## 327 327 3.8806544 3.9070130 4
## 328 328 NA 3.9598205 4
## 329 329 4.0226557 3.6212487 4
## 330 330 4.1956919 4.5781945 4
## 331 331 3.8540425 3.9128319 4
## 332 332 4.0616431 4.0036028 4
## 333 333 NA 4.0205777 4
## 334 334 4.1301273 3.8929814 4
## 335 335 3.9303327 3.6822656 4
## 336 336 NA 4.6689760 4
## 337 337 3.8498979 4.3259294 4
## 338 338 NA 4.2190044 4
## 339 339 3.5996916 4.0785205 4
## 340 340 4.2211286 3.6889119 4
## 341 341 NA 3.5863260 4
## 342 342 3.9374437 4.3484828 4
## 343 343 NA 3.7832389 4
## 344 344 3.8583607 3.3736348 4
## 345 345 4.2358756 4.2060441 4
## 346 346 4.4409623 3.7583155 4
## 347 347 NA 3.7366986 4
## 348 348 4.4116127 3.5505830 4
## 349 349 4.2196721 4.0230395 4
## 350 350 4.2743564 4.3818073 4
## 351 351 4.0834435 4.1319625 4
## 352 352 3.8766559 4.5851336 4
## 353 353 NA 3.7675707 4
## 354 354 NA 4.5938085 4
## 355 355 3.5415940 3.6509124 4
## 356 356 3.4175004 4.1507475 4
## 357 357 4.8080342 3.5950536 4
## 358 358 4.2245045 4.0363771 4
## 359 359 NA 3.9898450 4
## 360 360 3.5380312 4.4715776 4
## 361 361 NA 4.0223226 4
## 362 362 4.3070604 4.0731841 4
## 363 363 NA 4.2642174 4
## 364 364 3.5943743 4.3678552 4
## 365 365 4.3518757 4.3297973 4
## 366 366 4.0038028 4.6589989 4
## 367 367 3.4916286 3.9650951 4
## 368 368 3.7401950 3.7304495 4
## 369 369 NA 4.4083939 4
## 370 370 4.1282396 3.7711440 4
## 371 371 4.2755948 3.5237243 4
## 372 372 4.2628353 4.5689097 4
## 373 373 NA 3.4827091 4
## 374 374 4.3043997 4.0827318 4
## 375 375 4.0068700 3.9588135 4
## 376 376 4.0579176 3.3274770 4
## 377 377 3.7807398 3.6628580 4
## 378 378 NA 4.0639392 4
## 379 379 3.8853819 3.9131507 4
## 380 380 4.4660029 3.7494534 4
## 381 381 NA 3.9755479 4
## 382 382 3.9050029 3.5893448 4
## 383 383 3.4540026 3.1477135 4
## 384 384 NA 3.7478090 4
## 385 385 3.9682711 4.1421898 4
## 386 386 4.2559583 4.5019210 4
## 387 387 4.0861906 3.7950035 4
## 388 388 3.9331711 3.9469074 4
## 389 389 3.7591473 4.1226238 4
## 390 390 3.7572780 3.8618581 4
## 391 391 4.0981217 3.8540294 4
## 392 392 NA 4.1916993 4
## 393 393 NA 4.1275205 4
## 394 394 3.7137744 4.2314257 4
## 395 395 3.5422981 3.7211093 4
## 396 396 3.2119115 3.6943840 4
## 397 397 NA 4.3221165 4
## 398 398 3.4408164 3.9808492 4
## 399 399 3.6198537 4.1570393 4
## 400 400 4.0005277 4.2579742 4
## 401 401 4.9629049 4.2422309 5
## 402 402 NA 5.0070322 5
## 403 403 NA 4.9495746 5
## 404 404 NA 4.3193401 5
## 405 405 4.7447964 5.0256347 5
## 406 406 5.2756551 4.9227218 5
## 407 407 NA 4.7658744 5
## 408 408 4.9456442 4.6766726 5
## 409 409 5.0101160 4.7870993 5
## 410 410 NA 5.0376128 5
## 411 411 4.8620394 5.2395791 5
## 412 412 5.1552361 4.3784459 5
## 413 413 NA 5.1605822 5
## 414 414 5.0123625 4.8462883 5
## 415 415 4.9399809 4.6627123 5
## 416 416 NA 5.0816301 5
## 417 417 NA 4.9361446 5
## 418 418 4.9609575 4.9913760 5
## 419 419 4.7977331 4.9085418 5
## 420 420 4.9352900 5.1609235 5
## 421 421 NA 5.5309027 5
## 422 422 4.7023587 5.0047688 5
## 423 423 5.6711915 5.0633338 5
## 424 424 5.2414829 4.4815822 5
## 425 425 NA 4.6373326 5
## 426 426 5.3863081 5.1365235 5
## 427 427 5.2947810 4.7475621 5
## 428 428 5.3187440 5.4816437 5
## 429 429 4.8578147 4.9280052 5
## 430 430 5.2357165 5.5846631 5
## 431 431 4.9859486 4.9714519 5
## 432 432 4.8568890 4.9956828 5
## 433 433 4.6360512 4.7197370 5
## 434 434 5.1394574 5.1915715 5
## 435 435 NA 4.8060268 5
## 436 436 4.6981069 5.4857868 5
## 437 437 5.1869949 4.9236752 5
## 438 438 4.7888578 4.7330719 5
## 439 439 4.6198838 4.3638818 5
## 440 440 5.0116934 4.6114746 5
## 441 441 NA 5.0914839 5
## 442 442 NA 4.6426046 5
## 443 443 5.0603603 5.3649108 5
## 444 444 4.3935056 5.3382878 5
## 445 445 4.7919287 4.9022007 5
## 446 446 5.0069875 4.8662688 5
## 447 447 5.2848705 4.9361746 5
## 448 448 5.1318305 4.2595863 5
## 449 449 4.8061009 5.0554884 5
## 450 450 4.8059121 4.7641212 5
## 451 451 4.9462178 4.8988535 5
## 452 452 4.9971115 5.2482229 5
## 453 453 4.8560232 5.3053471 5
## 454 454 5.0850055 5.2744325 5
## 455 455 4.5219593 5.3878710 5
## 456 456 5.2696316 4.9126692 5
## 457 457 NA 4.9429658 5
## 458 458 5.1449609 5.2164283 5
## 459 459 5.0937129 4.9856145 5
## 460 460 5.1040760 5.4830499 5
## 461 461 4.8166429 4.8559136 5
## 462 462 NA 4.7703782 5
## 463 463 NA 5.7778211 5
## 464 464 5.2792591 4.9234981 5
## 465 465 NA 5.0636682 5
## 466 466 4.5066792 4.9043998 5
## 467 467 5.2588917 4.5656649 5
## 468 468 4.6186006 5.3081865 5
## 469 469 5.2702501 5.4359850 5
## 470 470 4.7221813 5.5162830 5
## 471 471 4.5440553 4.8790472 5
## 472 472 4.6623390 5.1527867 5
## 473 473 4.5438913 5.3159415 5
## 474 474 NA 5.1808000 5
## 475 475 4.8522698 5.0159296 5
## 476 476 5.0374700 4.8647180 5
## 477 477 4.7017291 5.3097452 5
## 478 478 4.7472060 5.1970918 5
## 479 479 4.8637476 4.9439548 5
## 480 480 4.7405159 4.8950215 5
## 481 481 5.6405392 4.8614680 5
## 482 482 NA 5.1033143 5
## 483 483 4.7239543 4.8959548 5
## 484 484 NA 4.8978228 5
## 485 485 5.3157870 4.9245687 5
## 486 486 4.5964368 5.4851325 5
## 487 487 4.6640333 5.4468762 5
## 488 488 4.9004773 5.0444003 5
## 489 489 4.9120045 5.1658056 5
## 490 490 NA 5.1546086 5
## 491 491 4.9536544 4.9044288 5
## 492 492 5.0237452 4.6587183 5
## 493 493 5.5639374 5.4494763 5
## 494 494 NA 4.4582237 5
## 495 495 4.7506781 5.0871638 5
## 496 496 5.2716727 4.9269265 5
## 497 497 4.8795458 5.0449881 5
## 498 498 5.3628868 5.4361093 5
## 499 499 5.2961830 5.5908125 5
## 500 500 5.2156221 5.0953297 5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.98
and plotting the results (without the NA’s) we get:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_point()`).
Let’s move to higher dimensions (and higher noise):
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
id_column = TRUE
)
Note that here we supplied id_column = TRUE
to indicate
that the first column is the id column.
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1
Let’s compare it to R vanilla kmeans:
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)
d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.7142857
We can see that kmeans++ clusters significantly better than R vanilla kmeans.
we can set the seed for reproducible results:
km1 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE