Basic usage of the package.
First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:
data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
## id V1 V2 true_clust
## 1 1 1.0573519 1.7478028 1
## 2 2 0.8887782 0.7696357 1
## 3 3 0.5269692 1.1493946 1
## 4 4 1.3208460 0.7824137 1
## 5 5 0.8126979 1.5054613 1
## 6 6 1.1336741 1.0945942 1
## 7 7 0.6759592 1.1531551 1
## 8 8 0.9540415 1.0529514 1
## 9 9 1.0006569 0.8315230 1
## 10 10 1.0536077 1.3770325 1
## 11 11 1.2585677 0.9205839 1
## 12 12 0.8650592 1.1372276 1
## 13 13 1.2668903 0.6216743 1
## 14 14 0.9522934 1.1224825 1
## 15 15 1.2696019 0.6984273 1
## 16 16 1.0727619 1.6396694 1
## 17 17 1.3975651 1.2097224 1
## 18 18 1.1757708 0.9048688 1
## 19 19 1.1410109 1.0434194 1
## 20 20 1.4004919 0.7503464 1
## 21 21 0.7653808 1.0777968 1
## 22 22 0.6732523 0.7906839 1
## 23 23 1.3472177 0.3429748 1
## 24 24 0.9413393 0.9503336 1
## 25 25 0.6842599 1.0403824 1
## 26 26 1.2746448 0.8712267 1
## 27 27 0.8354292 1.3091385 1
## 28 28 1.3018118 0.4171895 1
## 29 29 1.0826636 0.9239769 1
## 30 30 0.9336756 0.3525257 1
## 31 31 0.5483580 0.7138900 1
## 32 32 0.7591176 0.5363568 1
## 33 33 1.1538858 1.1079824 1
## 34 34 0.5369660 1.3539198 1
## 35 35 0.5442541 1.1136956 1
## 36 36 1.0531991 0.7798019 1
## 37 37 0.9962604 0.7152946 1
## 38 38 0.9696953 0.5923428 1
## 39 39 0.7437666 1.1423566 1
## 40 40 0.6010118 0.7181032 1
## 41 41 1.1351772 1.0386381 1
## 42 42 0.2437977 0.7528686 1
## 43 43 1.6215292 1.1479554 1
## 44 44 0.9475248 0.6548386 1
## 45 45 0.9597051 1.1258225 1
## 46 46 0.4850797 1.0008841 1
## 47 47 1.3914477 0.9196976 1
## 48 48 0.5641701 1.0907631 1
## 49 49 1.2659405 0.5198630 1
## 50 50 1.4996709 0.7589652 1
## 51 51 1.1188109 0.7218772 1
## 52 52 1.0615364 1.2542256 1
## 53 53 1.3208234 0.8193131 1
## 54 54 0.7073582 0.7141170 1
## 55 55 0.8038330 0.6260761 1
## 56 56 0.8037724 0.9136561 1
## 57 57 0.6688927 0.5611317 1
## 58 58 0.7331415 1.0390434 1
## 59 59 0.9758408 0.7093584 1
## 60 60 1.1862083 0.5044526 1
## 61 61 0.7982409 1.1079171 1
## 62 62 1.1752958 0.7158176 1
## 63 63 1.2827413 0.6460942 1
## 64 64 1.1534405 1.1042519 1
## 65 65 1.1837145 0.8843736 1
## 66 66 1.0995425 0.7952274 1
## 67 67 1.1567166 0.4780196 1
## 68 68 1.4009678 1.2769785 1
## 69 69 1.0116143 1.0279067 1
## 70 70 0.8864627 1.0235342 1
## 71 71 0.5915490 0.6496933 1
## 72 72 0.6570099 0.7774256 1
## 73 73 1.0502020 0.6826228 1
## 74 74 1.2245178 1.3484963 1
## 75 75 1.0590163 1.1499019 1
## 76 76 1.1681089 0.9064248 1
## 77 77 0.9417025 0.9732908 1
## 78 78 1.3080705 1.4753877 1
## 79 79 0.9378239 0.9017197 1
## 80 80 1.5940799 0.8228211 1
## 81 81 0.9799102 1.0903962 1
## 82 82 1.0814642 1.0459977 1
## 83 83 1.1512591 1.1450822 1
## 84 84 1.1563915 0.8992355 1
## 85 85 1.6003158 1.2937698 1
## 86 86 1.2047170 1.3241660 1
## 87 87 0.8906743 1.0305262 1
## 88 88 1.0232895 1.0955173 1
## 89 89 1.2447572 0.6185842 1
## 90 90 0.8438121 0.8225880 1
## 91 91 0.8170360 1.1927344 1
## 92 92 0.8759901 0.9877621 1
## 93 93 1.2447778 1.1029247 1
## 94 94 1.1317417 0.6398270 1
## 95 95 1.0557678 1.2873381 1
## 96 96 0.3132488 0.8014289 1
## 97 97 1.0642112 0.8221881 1
## 98 98 0.6471848 1.3936839 1
## 99 99 1.2265419 0.9881312 1
## 100 100 0.8808854 1.2001093 1
## 101 101 1.8771498 1.7732672 2
## 102 102 1.8656529 1.6102812 2
## 103 103 1.8902129 2.2716912 2
## 104 104 2.2528410 2.2421950 2
## 105 105 2.0146969 2.2037186 2
## 106 106 2.0571673 2.1831962 2
## 107 107 1.5153879 1.4021520 2
## 108 108 1.7644909 2.4067985 2
## 109 109 1.7870424 2.1608651 2
## 110 110 2.0583481 1.2250943 2
## 111 111 2.3458291 1.5937778 2
## 112 112 2.2847881 2.2453937 2
## 113 113 2.2171585 2.1001149 2
## 114 114 2.0621500 1.9473138 2
## 115 115 1.7385439 1.9590821 2
## 116 116 2.0307697 2.1765444 2
## 117 117 2.2137788 1.8862327 2
## 118 118 2.2423609 2.3209563 2
## 119 119 2.0750316 2.0551506 2
## 120 120 1.4492930 2.2375473 2
## 121 121 1.9913989 2.0278606 2
## 122 122 1.8850259 2.1148138 2
## 123 123 1.9707447 2.4592672 2
## 124 124 2.2075753 2.0948015 2
## 125 125 2.0126049 2.6308717 2
## 126 126 2.0283098 1.8761194 2
## 127 127 1.9463263 1.6075172 2
## 128 128 1.9703637 2.1165193 2
## 129 129 1.9737552 2.1636836 2
## 130 130 2.0916448 2.1695019 2
## 131 131 1.6349939 1.8360027 2
## 132 132 1.7901780 1.7874707 2
## 133 133 1.4701850 1.5788929 2
## 134 134 1.7807438 2.0392232 2
## 135 135 1.4754489 1.7173733 2
## 136 136 1.5034552 1.7905576 2
## 137 137 1.8888263 2.2175118 2
## 138 138 2.5301248 2.4090070 2
## 139 139 2.1706763 1.9731797 2
## 140 140 1.7870885 2.0117899 2
## 141 141 2.0399442 2.0871188 2
## 142 142 2.0921740 2.4561481 2
## 143 143 2.0505578 1.2893545 2
## 144 144 1.9369227 2.3211463 2
## 145 145 1.5930952 1.8895866 2
## 146 146 1.5869744 2.1308455 2
## 147 147 1.6693242 1.7933147 2
## 148 148 2.2202213 2.2461861 2
## 149 149 2.3228489 1.7847326 2
## 150 150 1.5944095 2.1800490 2
## 151 151 1.7018048 1.8475247 2
## 152 152 2.3509971 1.8496367 2
## 153 153 2.0795178 2.0882344 2
## 154 154 2.1188523 2.0773122 2
## 155 155 2.3126464 1.5343615 2
## 156 156 1.7105233 2.6269813 2
## 157 157 2.5267670 1.6151418 2
## 158 158 2.4375357 2.0788427 2
## 159 159 2.3534221 1.9087801 2
## 160 160 2.2168841 2.2102064 2
## 161 161 1.6816889 2.0190378 2
## 162 162 2.1181298 1.8870981 2
## 163 163 1.3220489 2.1816763 2
## 164 164 2.2043243 1.4676456 2
## 165 165 2.0173552 1.9194359 2
## 166 166 1.4915602 1.9062962 2
## 167 167 1.9075814 1.8654798 2
## 168 168 1.9283135 1.8666695 2
## 169 169 1.4946530 2.0449109 2
## 170 170 2.1452323 2.2475670 2
## 171 171 1.8060153 1.9800374 2
## 172 172 2.3146444 1.4553192 2
## 173 173 1.5117372 2.3444982 2
## 174 174 1.6535444 1.7997107 2
## 175 175 1.2499836 2.4468576 2
## 176 176 1.9890634 1.7668571 2
## 177 177 2.0078663 1.9689432 2
## 178 178 2.1826120 1.9883723 2
## 179 179 2.0141587 1.9979885 2
## 180 180 1.5795955 2.0853344 2
## 181 181 2.1758930 1.6282237 2
## 182 182 1.5479711 1.5534837 2
## 183 183 2.4613298 2.0013553 2
## 184 184 1.5060333 2.0199955 2
## 185 185 1.6711205 2.1533425 2
## 186 186 2.3032199 1.9896463 2
## 187 187 1.6791318 1.6290961 2
## 188 188 1.8374137 2.2410427 2
## 189 189 2.2772023 2.3338793 2
## 190 190 1.9595828 2.2527371 2
## 191 191 2.3284564 2.1093576 2
## 192 192 1.7016235 1.6914032 2
## 193 193 1.5762973 2.3082377 2
## 194 194 1.8821912 1.5825925 2
## 195 195 1.8848727 1.8853790 2
## 196 196 2.2597503 2.0622894 2
## 197 197 2.2323333 2.2830704 2
## 198 198 1.6201120 1.7764591 2
## 199 199 1.6523724 2.0193403 2
## 200 200 1.9539210 2.2344252 2
## 201 201 2.5819722 2.6567837 3
## 202 202 3.7832316 3.3028839 3
## 203 203 2.8610307 2.9080235 3
## 204 204 2.9901584 3.0943378 3
## 205 205 2.8070174 3.0703758 3
## 206 206 3.1339659 2.4290466 3
## 207 207 3.0380446 3.1386832 3
## 208 208 3.0281548 2.6853569 3
## 209 209 2.7162959 2.6574430 3
## 210 210 3.5930636 3.5263689 3
## 211 211 2.7343347 3.1306349 3
## 212 212 3.0474078 3.2267974 3
## 213 213 2.6853599 3.2922745 3
## 214 214 3.2395651 3.1359853 3
## 215 215 2.9669598 2.6972206 3
## 216 216 3.2683105 3.5274607 3
## 217 217 3.1273191 3.0142304 3
## 218 218 3.1373893 3.2388580 3
## 219 219 3.3383650 3.2082553 3
## 220 220 2.9975194 2.3522521 3
## 221 221 3.6153644 2.7892620 3
## 222 222 2.8461284 2.9314056 3
## 223 223 3.0818398 2.8281676 3
## 224 224 3.3851424 2.7852211 3
## 225 225 2.2244761 3.5469746 3
## 226 226 2.8095517 2.4626160 3
## 227 227 3.6508666 3.3560274 3
## 228 228 2.7399855 2.9685999 3
## 229 229 2.9550281 3.5272008 3
## 230 230 3.3785572 2.7083776 3
## 231 231 3.1683925 3.3237273 3
## 232 232 2.3411670 2.7699882 3
## 233 233 3.1370998 2.6864259 3
## 234 234 3.0956364 3.4672988 3
## 235 235 3.5666822 3.0121527 3
## 236 236 3.0622586 2.5919551 3
## 237 237 3.4237445 3.1104173 3
## 238 238 3.2756589 3.0391673 3
## 239 239 2.4897821 2.9606401 3
## 240 240 2.9341736 2.9715870 3
## 241 241 2.4684781 2.5386491 3
## 242 242 2.8460596 3.1439480 3
## 243 243 2.8189948 2.7818898 3
## 244 244 3.0199555 3.0133392 3
## 245 245 2.7966737 2.9273637 3
## 246 246 3.5255893 3.2752274 3
## 247 247 3.0797900 2.4478439 3
## 248 248 2.8447704 3.0511925 3
## 249 249 2.9218640 3.1880018 3
## 250 250 3.0945030 2.8852678 3
## 251 251 3.0406291 2.6547352 3
## 252 252 3.0128696 2.9649426 3
## 253 253 3.4363800 3.1199428 3
## 254 254 2.4605726 3.2040897 3
## 255 255 3.2086744 3.1462461 3
## 256 256 2.6813105 3.4488977 3
## 257 257 2.9036231 2.4466426 3
## 258 258 3.1365861 3.1815340 3
## 259 259 3.1253695 2.7634753 3
## 260 260 3.0787704 3.3339114 3
## 261 261 3.2313037 2.9723319 3
## 262 262 2.9445187 3.0200138 3
## 263 263 2.6034877 2.5892338 3
## 264 264 2.8454410 2.7943757 3
## 265 265 2.6499147 3.4176901 3
## 266 266 3.0505713 3.7679258 3
## 267 267 3.0833351 2.7855170 3
## 268 268 3.2226999 3.0969466 3
## 269 269 3.2216109 2.8514640 3
## 270 270 2.6036952 3.0328911 3
## 271 271 3.1782502 2.9253867 3
## 272 272 2.7684246 2.5690026 3
## 273 273 3.2782945 3.2051712 3
## 274 274 2.8761220 2.9241752 3
## 275 275 2.9073361 3.3500443 3
## 276 276 2.7977983 3.0704919 3
## 277 277 2.7571229 3.3002550 3
## 278 278 3.0211307 2.7796439 3
## 279 279 3.2905422 2.7314109 3
## 280 280 2.9442027 3.2482132 3
## 281 281 2.4908940 2.9041798 3
## 282 282 2.5040276 3.0997270 3
## 283 283 2.7244374 3.1601071 3
## 284 284 2.4381125 2.6579235 3
## 285 285 2.6848554 3.0012474 3
## 286 286 2.6023956 2.8879919 3
## 287 287 2.7914558 3.1804200 3
## 288 288 3.0592855 3.2297995 3
## 289 289 3.2365817 3.2208429 3
## 290 290 3.4655675 2.7035687 3
## 291 291 3.0905029 3.0045550 3
## 292 292 3.2903506 3.0102220 3
## 293 293 2.9555249 2.5675525 3
## 294 294 3.5858495 2.9978746 3
## 295 295 3.0235324 3.0744484 3
## 296 296 3.0216215 3.1364387 3
## 297 297 2.8722143 3.1256463 3
## 298 298 2.6795480 2.6906942 3
## 299 299 2.8275441 2.8454521 3
## 300 300 2.8541627 3.2153109 3
## 301 301 3.9319212 4.1047787 4
## 302 302 4.2031011 4.1359434 4
## 303 303 4.0141791 4.0903680 4
## 304 304 4.5563307 4.0732038 4
## 305 305 4.4486916 4.0189316 4
## 306 306 4.0397612 4.0247592 4
## 307 307 3.8532171 4.5728605 4
## 308 308 3.9524919 3.6402595 4
## 309 309 4.0389840 4.1587783 4
## 310 310 4.3185560 3.7215885 4
## 311 311 4.1128665 4.0560297 4
## 312 312 4.4458704 4.0531084 4
## 313 313 4.4317854 3.8649695 4
## 314 314 4.1173581 3.5166461 4
## 315 315 3.6585833 4.2296273 4
## 316 316 3.9598596 3.7078570 4
## 317 317 4.2311779 4.0549252 4
## 318 318 3.8440411 4.3013155 4
## 319 319 4.0072497 3.9959807 4
## 320 320 3.5685618 4.6784372 4
## 321 321 3.8635897 3.9775809 4
## 322 322 3.6831961 3.9346015 4
## 323 323 4.2624566 3.9525968 4
## 324 324 3.9087102 4.2356386 4
## 325 325 3.8966738 3.6831030 4
## 326 326 4.2609210 3.9631854 4
## 327 327 4.0945597 3.6859991 4
## 328 328 3.8364296 3.9555679 4
## 329 329 3.9525787 4.7345219 4
## 330 330 4.4383050 4.0457942 4
## 331 331 3.6406599 4.3905430 4
## 332 332 4.0212268 4.2134911 4
## 333 333 3.6725628 3.7573330 4
## 334 334 3.5565935 4.1645013 4
## 335 335 3.9266785 3.8393305 4
## 336 336 4.3669101 4.1737403 4
## 337 337 4.0541001 4.1037426 4
## 338 338 3.9346532 4.2760638 4
## 339 339 4.1979865 4.4269043 4
## 340 340 4.2375841 3.9333475 4
## 341 341 4.4481982 4.1750107 4
## 342 342 4.1149822 4.1838701 4
## 343 343 4.2563582 3.6389154 4
## 344 344 3.8626997 3.9116499 4
## 345 345 3.9325027 4.1438197 4
## 346 346 4.3739662 3.9824950 4
## 347 347 3.9566056 3.8946457 4
## 348 348 4.0582495 3.8343217 4
## 349 349 4.0228510 3.8863071 4
## 350 350 4.5821048 4.1458209 4
## 351 351 3.8650351 3.9805668 4
## 352 352 4.0403204 3.8558202 4
## 353 353 4.1236375 4.2442608 4
## 354 354 3.9066946 4.0137895 4
## 355 355 3.7516307 4.2002033 4
## 356 356 3.6020295 4.4084621 4
## 357 357 4.1958159 3.7809229 4
## 358 358 4.1252388 4.4175897 4
## 359 359 4.0829571 4.3375948 4
## 360 360 3.9502335 3.9279509 4
## 361 361 4.1764366 4.0724829 4
## 362 362 4.3124123 3.5704137 4
## 363 363 3.3435430 4.1069496 4
## 364 364 4.3407060 4.2315522 4
## 365 365 4.1325530 4.0919474 4
## 366 366 3.9235495 3.2245887 4
## 367 367 4.5689692 4.2002139 4
## 368 368 4.5960803 3.7843448 4
## 369 369 4.0514892 4.0091081 4
## 370 370 3.9323193 3.7281339 4
## 371 371 4.0386516 3.7446616 4
## 372 372 4.1898889 3.8100170 4
## 373 373 3.5825260 4.4314731 4
## 374 374 3.7114216 4.1480784 4
## 375 375 3.9080832 4.0036010 4
## 376 376 3.5074064 3.8485071 4
## 377 377 3.6531409 3.5823952 4
## 378 378 3.9791064 3.9305876 4
## 379 379 3.3747293 3.8854777 4
## 380 380 4.3347121 3.9965572 4
## 381 381 3.9593086 3.7610437 4
## 382 382 4.3769150 4.3976243 4
## 383 383 3.5241727 4.3646940 4
## 384 384 4.0252329 4.5869691 4
## 385 385 3.8200147 3.9123737 4
## 386 386 4.4999342 4.3462749 4
## 387 387 3.9031587 4.0120649 4
## 388 388 4.0884901 4.2716017 4
## 389 389 4.3656236 4.2483480 4
## 390 390 4.0927912 3.9236437 4
## 391 391 4.2461660 3.9907240 4
## 392 392 4.3619378 3.8281770 4
## 393 393 4.0253087 3.8700423 4
## 394 394 3.7252807 4.2826156 4
## 395 395 3.8768540 4.0218492 4
## 396 396 3.9353468 4.0839695 4
## 397 397 3.9845756 4.5274781 4
## 398 398 4.3175399 3.9474645 4
## 399 399 4.1956247 3.6506497 4
## 400 400 3.9629929 4.4813448 4
## 401 401 5.0939315 5.0569303 5
## 402 402 5.4797333 4.9093912 5
## 403 403 5.1810277 4.7126132 5
## 404 404 4.5770969 4.8603005 5
## 405 405 5.5510416 5.1442615 5
## 406 406 4.6207706 4.8672681 5
## 407 407 5.3149470 5.1397374 5
## 408 408 5.0665965 4.6585505 5
## 409 409 5.1835886 5.3322633 5
## 410 410 5.3885771 4.9830286 5
## 411 411 4.9562200 5.5049811 5
## 412 412 5.2718581 5.2405135 5
## 413 413 5.2084373 4.9560566 5
## 414 414 4.9721527 5.3795614 5
## 415 415 5.1814817 4.8598929 5
## 416 416 5.6640558 4.9886294 5
## 417 417 4.8236729 5.2310906 5
## 418 418 5.0248687 5.3233466 5
## 419 419 5.2635765 4.9208782 5
## 420 420 4.7660847 5.5413918 5
## 421 421 4.9247488 4.9033880 5
## 422 422 5.0183630 5.3193518 5
## 423 423 5.2639345 5.3483237 5
## 424 424 4.8960107 5.5002529 5
## 425 425 4.8715654 4.5184043 5
## 426 426 4.7970624 4.9077186 5
## 427 427 4.8582851 5.2208421 5
## 428 428 5.1047219 5.0046923 5
## 429 429 4.9271671 5.4422334 5
## 430 430 5.0680027 4.8948732 5
## 431 431 4.9295152 5.6661267 5
## 432 432 5.2617314 5.4790931 5
## 433 433 5.1537129 4.5694669 5
## 434 434 5.3240808 5.6557459 5
## 435 435 5.4158041 5.1627972 5
## 436 436 5.0162186 5.5942489 5
## 437 437 5.0449211 5.1983505 5
## 438 438 5.6727011 5.2197938 5
## 439 439 4.6527547 5.1416786 5
## 440 440 4.7959970 5.7314609 5
## 441 441 4.6232391 4.5484302 5
## 442 442 4.7028692 4.6354168 5
## 443 443 4.6446092 5.0542124 5
## 444 444 4.8668172 4.4917569 5
## 445 445 5.8587500 5.0461115 5
## 446 446 4.5827101 4.3551495 5
## 447 447 4.7592270 4.6402052 5
## 448 448 4.7851855 4.9353877 5
## 449 449 4.9766000 5.0017814 5
## 450 450 5.0633071 4.7822601 5
## 451 451 4.6576744 5.0956024 5
## 452 452 4.8617920 4.5624429 5
## 453 453 5.3491110 4.8368894 5
## 454 454 5.4456517 5.0278979 5
## 455 455 4.8107401 5.1965530 5
## 456 456 5.0508482 4.6658209 5
## 457 457 4.7841330 4.9769776 5
## 458 458 4.7169797 5.0811367 5
## 459 459 5.5069816 5.6701431 5
## 460 460 5.1915878 4.8684516 5
## 461 461 4.9683996 4.9602568 5
## 462 462 4.8230330 5.6446796 5
## 463 463 5.2747394 5.1737542 5
## 464 464 4.9458842 4.8583164 5
## 465 465 4.7860390 5.0114536 5
## 466 466 5.1625189 5.1314488 5
## 467 467 5.1029722 4.5914207 5
## 468 468 5.1994724 4.8600189 5
## 469 469 5.5347419 5.1175816 5
## 470 470 4.8573577 4.7536672 5
## 471 471 5.0869352 5.1629523 5
## 472 472 4.8054829 5.1358627 5
## 473 473 5.0382698 5.1894259 5
## 474 474 4.8631021 4.9282662 5
## 475 475 4.3164392 4.8911148 5
## 476 476 4.6409209 4.4486824 5
## 477 477 5.4132810 5.2804849 5
## 478 478 4.8010176 4.7578252 5
## 479 479 5.0961231 4.8786333 5
## 480 480 5.0187851 5.0409791 5
## 481 481 4.4393572 4.9250130 5
## 482 482 5.0292903 4.5412575 5
## 483 483 5.3828960 4.8183111 5
## 484 484 4.8450897 5.4215614 5
## 485 485 4.5558001 5.0734896 5
## 486 486 4.9906114 4.7052461 5
## 487 487 5.0706787 5.0065545 5
## 488 488 4.7898177 5.3107710 5
## 489 489 5.2205857 4.9906757 5
## 490 490 5.2708506 5.2940148 5
## 491 491 4.6971965 5.2695113 5
## 492 492 4.5837589 4.6601760 5
## 493 493 5.2856180 4.5972000 5
## 494 494 5.0437964 5.0664296 5
## 495 495 5.1229746 5.1062571 5
## 496 496 5.2805337 5.4254368 5
## 497 497 4.5811619 4.6489740 5
## 498 498 4.7703773 5.1451297 5
## 499 499 5.1040017 5.0568612 5
## 500 500 4.9806582 4.6061834 5
This is how our data looks like:
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "true cluster")
Now we can cluster it using kmeans++:
data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
k = 5,
metric = "euclid",
verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 147 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 437 dist was 2.34612
## add new core from 437 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 386 dist was 0.862664
## add new core from 386 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 92 dist was 0.683466
## add new core from 92 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 213 dist was 0.572069
## add new core from 213 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 6
## KMEans: iter 1
## KMEans: iter 2 changed 5
## KMEans: iter 2
## KMEans: iter 3 changed 0
The returned list contains 3 fields:
names(km)
## [1] "centers" "cluster" "size"
km$centers
contains a tibble with clust
column and the cluster centers:
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 2.98 2.99
## 2 2 1.94 2.00
## 3 3 5.02 5.04
## 4 4 4.04 4.03
## 5 5 1.01 0.952
clusters are numbered according to order_func
(see ‘Custom cluster ordering’ section).
km$cluster
contains tibble with id
column with the observation id (1:n
if no id column was supplied), and clust
column with the observation assigned cluster:
km$cluster
## # A tibble: 500 × 2
## id clust
## <chr> <int>
## 1 1 5
## 2 2 5
## 3 3 5
## 4 4 5
## 5 5 5
## 6 6 5
## 7 7 5
## 8 8 5
## 9 9 5
## 10 10 5
## # ℹ 490 more rows
km$size
contains tibble with clust
column and n
column with the number of points in each cluster:
km$size
## # A tibble: 5 × 2
## clust n
## <int> <int>
## 1 1 98
## 2 2 99
## 3 3 99
## 4 4 103
## 5 5 101
We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters
function is internal to the package and is used only in this vignette):
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.992
And plot the results:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers))))
- hclust of the euclidean distance of the correlation matrix of the centers.
We can supply our own function to order the clusters using reorder_func
argument. The function would be applied to each center and he clusters would be ordered by the result.
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE,
reorder_func = median
)
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 1.01 0.952
## 2 2 1.94 2.00
## 3 3 2.97 2.99
## 4 4 4.03 4.03
## 5 5 5.02 5.04
tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:
## id V1 V2 true_clust
## 1 1 1.0573519 1.7478028 1
## 2 2 0.8887782 0.7696357 1
## 3 3 0.5269692 1.1493946 1
## 4 4 1.3208460 0.7824137 1
## 5 5 0.8126979 1.5054613 1
## 6 6 1.1336741 1.0945942 1
## 7 7 0.6759592 1.1531551 1
## 8 8 0.9540415 1.0529514 1
## 9 9 1.0006569 0.8315230 1
## 10 10 1.0536077 1.3770325 1
## 11 11 1.2585677 0.9205839 1
## 12 12 0.8650592 1.1372276 1
## 13 13 1.2668903 0.6216743 1
## 14 14 0.9522934 1.1224825 1
## 15 15 NA 0.6984273 1
## 16 16 1.0727619 1.6396694 1
## 17 17 1.3975651 1.2097224 1
## 18 18 1.1757708 0.9048688 1
## 19 19 1.1410109 1.0434194 1
## 20 20 1.4004919 0.7503464 1
## 21 21 0.7653808 1.0777968 1
## 22 22 0.6732523 0.7906839 1
## 23 23 NA 0.3429748 1
## 24 24 0.9413393 0.9503336 1
## 25 25 0.6842599 1.0403824 1
## 26 26 1.2746448 0.8712267 1
## 27 27 0.8354292 1.3091385 1
## 28 28 1.3018118 0.4171895 1
## 29 29 NA 0.9239769 1
## 30 30 0.9336756 0.3525257 1
## 31 31 NA 0.7138900 1
## 32 32 0.7591176 0.5363568 1
## 33 33 1.1538858 1.1079824 1
## 34 34 0.5369660 1.3539198 1
## 35 35 0.5442541 1.1136956 1
## 36 36 1.0531991 0.7798019 1
## 37 37 0.9962604 0.7152946 1
## 38 38 0.9696953 0.5923428 1
## 39 39 0.7437666 1.1423566 1
## 40 40 0.6010118 0.7181032 1
## 41 41 1.1351772 1.0386381 1
## 42 42 NA 0.7528686 1
## 43 43 1.6215292 1.1479554 1
## 44 44 0.9475248 0.6548386 1
## 45 45 NA 1.1258225 1
## 46 46 0.4850797 1.0008841 1
## 47 47 1.3914477 0.9196976 1
## 48 48 NA 1.0907631 1
## 49 49 1.2659405 0.5198630 1
## 50 50 1.4996709 0.7589652 1
## 51 51 NA 0.7218772 1
## 52 52 1.0615364 1.2542256 1
## 53 53 1.3208234 0.8193131 1
## 54 54 0.7073582 0.7141170 1
## 55 55 0.8038330 0.6260761 1
## 56 56 NA 0.9136561 1
## 57 57 0.6688927 0.5611317 1
## 58 58 0.7331415 1.0390434 1
## 59 59 0.9758408 0.7093584 1
## 60 60 1.1862083 0.5044526 1
## 61 61 0.7982409 1.1079171 1
## 62 62 1.1752958 0.7158176 1
## 63 63 1.2827413 0.6460942 1
## 64 64 1.1534405 1.1042519 1
## 65 65 1.1837145 0.8843736 1
## 66 66 1.0995425 0.7952274 1
## 67 67 1.1567166 0.4780196 1
## 68 68 1.4009678 1.2769785 1
## 69 69 1.0116143 1.0279067 1
## 70 70 0.8864627 1.0235342 1
## 71 71 0.5915490 0.6496933 1
## 72 72 NA 0.7774256 1
## 73 73 1.0502020 0.6826228 1
## 74 74 1.2245178 1.3484963 1
## 75 75 1.0590163 1.1499019 1
## 76 76 NA 0.9064248 1
## 77 77 0.9417025 0.9732908 1
## 78 78 1.3080705 1.4753877 1
## 79 79 0.9378239 0.9017197 1
## 80 80 NA 0.8228211 1
## 81 81 NA 1.0903962 1
## 82 82 NA 1.0459977 1
## 83 83 NA 1.1450822 1
## 84 84 1.1563915 0.8992355 1
## 85 85 1.6003158 1.2937698 1
## 86 86 1.2047170 1.3241660 1
## 87 87 0.8906743 1.0305262 1
## 88 88 NA 1.0955173 1
## 89 89 1.2447572 0.6185842 1
## 90 90 0.8438121 0.8225880 1
## 91 91 0.8170360 1.1927344 1
## 92 92 0.8759901 0.9877621 1
## 93 93 1.2447778 1.1029247 1
## 94 94 NA 0.6398270 1
## 95 95 1.0557678 1.2873381 1
## 96 96 NA 0.8014289 1
## 97 97 1.0642112 0.8221881 1
## 98 98 0.6471848 1.3936839 1
## 99 99 1.2265419 0.9881312 1
## 100 100 0.8808854 1.2001093 1
## 101 101 1.8771498 1.7732672 2
## 102 102 NA 1.6102812 2
## 103 103 NA 2.2716912 2
## 104 104 2.2528410 2.2421950 2
## 105 105 2.0146969 2.2037186 2
## 106 106 2.0571673 2.1831962 2
## 107 107 1.5153879 1.4021520 2
## 108 108 1.7644909 2.4067985 2
## 109 109 1.7870424 2.1608651 2
## 110 110 2.0583481 1.2250943 2
## 111 111 2.3458291 1.5937778 2
## 112 112 NA 2.2453937 2
## 113 113 2.2171585 2.1001149 2
## 114 114 2.0621500 1.9473138 2
## 115 115 1.7385439 1.9590821 2
## 116 116 2.0307697 2.1765444 2
## 117 117 NA 1.8862327 2
## 118 118 2.2423609 2.3209563 2
## 119 119 NA 2.0551506 2
## 120 120 1.4492930 2.2375473 2
## 121 121 1.9913989 2.0278606 2
## 122 122 NA 2.1148138 2
## 123 123 1.9707447 2.4592672 2
## 124 124 NA 2.0948015 2
## 125 125 2.0126049 2.6308717 2
## 126 126 2.0283098 1.8761194 2
## 127 127 1.9463263 1.6075172 2
## 128 128 NA 2.1165193 2
## 129 129 NA 2.1636836 2
## 130 130 2.0916448 2.1695019 2
## 131 131 NA 1.8360027 2
## 132 132 1.7901780 1.7874707 2
## 133 133 NA 1.5788929 2
## 134 134 1.7807438 2.0392232 2
## 135 135 1.4754489 1.7173733 2
## 136 136 1.5034552 1.7905576 2
## 137 137 1.8888263 2.2175118 2
## 138 138 2.5301248 2.4090070 2
## 139 139 2.1706763 1.9731797 2
## 140 140 1.7870885 2.0117899 2
## 141 141 2.0399442 2.0871188 2
## 142 142 2.0921740 2.4561481 2
## 143 143 2.0505578 1.2893545 2
## 144 144 1.9369227 2.3211463 2
## 145 145 1.5930952 1.8895866 2
## 146 146 1.5869744 2.1308455 2
## 147 147 1.6693242 1.7933147 2
## 148 148 2.2202213 2.2461861 2
## 149 149 2.3228489 1.7847326 2
## 150 150 NA 2.1800490 2
## 151 151 1.7018048 1.8475247 2
## 152 152 NA 1.8496367 2
## 153 153 NA 2.0882344 2
## 154 154 2.1188523 2.0773122 2
## 155 155 2.3126464 1.5343615 2
## 156 156 1.7105233 2.6269813 2
## 157 157 NA 1.6151418 2
## 158 158 2.4375357 2.0788427 2
## 159 159 2.3534221 1.9087801 2
## 160 160 2.2168841 2.2102064 2
## 161 161 1.6816889 2.0190378 2
## 162 162 2.1181298 1.8870981 2
## 163 163 NA 2.1816763 2
## 164 164 2.2043243 1.4676456 2
## 165 165 NA 1.9194359 2
## 166 166 1.4915602 1.9062962 2
## 167 167 1.9075814 1.8654798 2
## 168 168 1.9283135 1.8666695 2
## 169 169 1.4946530 2.0449109 2
## 170 170 2.1452323 2.2475670 2
## 171 171 1.8060153 1.9800374 2
## 172 172 2.3146444 1.4553192 2
## 173 173 1.5117372 2.3444982 2
## 174 174 1.6535444 1.7997107 2
## 175 175 NA 2.4468576 2
## 176 176 1.9890634 1.7668571 2
## 177 177 NA 1.9689432 2
## 178 178 NA 1.9883723 2
## 179 179 NA 1.9979885 2
## 180 180 1.5795955 2.0853344 2
## 181 181 2.1758930 1.6282237 2
## 182 182 1.5479711 1.5534837 2
## 183 183 2.4613298 2.0013553 2
## 184 184 1.5060333 2.0199955 2
## 185 185 NA 2.1533425 2
## 186 186 NA 1.9896463 2
## 187 187 NA 1.6290961 2
## 188 188 1.8374137 2.2410427 2
## 189 189 2.2772023 2.3338793 2
## 190 190 1.9595828 2.2527371 2
## 191 191 NA 2.1093576 2
## 192 192 1.7016235 1.6914032 2
## 193 193 1.5762973 2.3082377 2
## 194 194 1.8821912 1.5825925 2
## 195 195 1.8848727 1.8853790 2
## 196 196 2.2597503 2.0622894 2
## 197 197 2.2323333 2.2830704 2
## 198 198 1.6201120 1.7764591 2
## 199 199 1.6523724 2.0193403 2
## 200 200 1.9539210 2.2344252 2
## 201 201 2.5819722 2.6567837 3
## 202 202 3.7832316 3.3028839 3
## 203 203 2.8610307 2.9080235 3
## 204 204 2.9901584 3.0943378 3
## 205 205 2.8070174 3.0703758 3
## 206 206 3.1339659 2.4290466 3
## 207 207 3.0380446 3.1386832 3
## 208 208 3.0281548 2.6853569 3
## 209 209 NA 2.6574430 3
## 210 210 NA 3.5263689 3
## 211 211 NA 3.1306349 3
## 212 212 3.0474078 3.2267974 3
## 213 213 2.6853599 3.2922745 3
## 214 214 3.2395651 3.1359853 3
## 215 215 2.9669598 2.6972206 3
## 216 216 NA 3.5274607 3
## 217 217 3.1273191 3.0142304 3
## 218 218 3.1373893 3.2388580 3
## 219 219 3.3383650 3.2082553 3
## 220 220 2.9975194 2.3522521 3
## 221 221 3.6153644 2.7892620 3
## 222 222 2.8461284 2.9314056 3
## 223 223 3.0818398 2.8281676 3
## 224 224 NA 2.7852211 3
## 225 225 2.2244761 3.5469746 3
## 226 226 2.8095517 2.4626160 3
## 227 227 3.6508666 3.3560274 3
## 228 228 2.7399855 2.9685999 3
## 229 229 2.9550281 3.5272008 3
## 230 230 NA 2.7083776 3
## 231 231 NA 3.3237273 3
## 232 232 2.3411670 2.7699882 3
## 233 233 3.1370998 2.6864259 3
## 234 234 3.0956364 3.4672988 3
## 235 235 3.5666822 3.0121527 3
## 236 236 NA 2.5919551 3
## 237 237 3.4237445 3.1104173 3
## 238 238 3.2756589 3.0391673 3
## 239 239 2.4897821 2.9606401 3
## 240 240 2.9341736 2.9715870 3
## 241 241 2.4684781 2.5386491 3
## 242 242 2.8460596 3.1439480 3
## 243 243 2.8189948 2.7818898 3
## 244 244 3.0199555 3.0133392 3
## 245 245 2.7966737 2.9273637 3
## 246 246 3.5255893 3.2752274 3
## 247 247 3.0797900 2.4478439 3
## 248 248 2.8447704 3.0511925 3
## 249 249 2.9218640 3.1880018 3
## 250 250 3.0945030 2.8852678 3
## 251 251 3.0406291 2.6547352 3
## 252 252 3.0128696 2.9649426 3
## 253 253 3.4363800 3.1199428 3
## 254 254 2.4605726 3.2040897 3
## 255 255 3.2086744 3.1462461 3
## 256 256 2.6813105 3.4488977 3
## 257 257 NA 2.4466426 3
## 258 258 3.1365861 3.1815340 3
## 259 259 NA 2.7634753 3
## 260 260 3.0787704 3.3339114 3
## 261 261 3.2313037 2.9723319 3
## 262 262 2.9445187 3.0200138 3
## 263 263 2.6034877 2.5892338 3
## 264 264 2.8454410 2.7943757 3
## 265 265 2.6499147 3.4176901 3
## 266 266 3.0505713 3.7679258 3
## 267 267 3.0833351 2.7855170 3
## 268 268 3.2226999 3.0969466 3
## 269 269 NA 2.8514640 3
## 270 270 2.6036952 3.0328911 3
## 271 271 3.1782502 2.9253867 3
## 272 272 2.7684246 2.5690026 3
## 273 273 3.2782945 3.2051712 3
## 274 274 2.8761220 2.9241752 3
## 275 275 2.9073361 3.3500443 3
## 276 276 2.7977983 3.0704919 3
## 277 277 2.7571229 3.3002550 3
## 278 278 3.0211307 2.7796439 3
## 279 279 3.2905422 2.7314109 3
## 280 280 2.9442027 3.2482132 3
## 281 281 2.4908940 2.9041798 3
## 282 282 2.5040276 3.0997270 3
## 283 283 2.7244374 3.1601071 3
## 284 284 2.4381125 2.6579235 3
## 285 285 2.6848554 3.0012474 3
## 286 286 2.6023956 2.8879919 3
## 287 287 2.7914558 3.1804200 3
## 288 288 3.0592855 3.2297995 3
## 289 289 3.2365817 3.2208429 3
## 290 290 3.4655675 2.7035687 3
## 291 291 NA 3.0045550 3
## 292 292 NA 3.0102220 3
## 293 293 2.9555249 2.5675525 3
## 294 294 NA 2.9978746 3
## 295 295 3.0235324 3.0744484 3
## 296 296 3.0216215 3.1364387 3
## 297 297 2.8722143 3.1256463 3
## 298 298 2.6795480 2.6906942 3
## 299 299 NA 2.8454521 3
## 300 300 2.8541627 3.2153109 3
## 301 301 3.9319212 4.1047787 4
## 302 302 4.2031011 4.1359434 4
## 303 303 4.0141791 4.0903680 4
## 304 304 NA 4.0732038 4
## 305 305 4.4486916 4.0189316 4
## 306 306 4.0397612 4.0247592 4
## 307 307 NA 4.5728605 4
## 308 308 3.9524919 3.6402595 4
## 309 309 NA 4.1587783 4
## 310 310 4.3185560 3.7215885 4
## 311 311 NA 4.0560297 4
## 312 312 4.4458704 4.0531084 4
## 313 313 4.4317854 3.8649695 4
## 314 314 4.1173581 3.5166461 4
## 315 315 NA 4.2296273 4
## 316 316 3.9598596 3.7078570 4
## 317 317 4.2311779 4.0549252 4
## 318 318 3.8440411 4.3013155 4
## 319 319 4.0072497 3.9959807 4
## 320 320 3.5685618 4.6784372 4
## 321 321 NA 3.9775809 4
## 322 322 3.6831961 3.9346015 4
## 323 323 4.2624566 3.9525968 4
## 324 324 3.9087102 4.2356386 4
## 325 325 3.8966738 3.6831030 4
## 326 326 4.2609210 3.9631854 4
## 327 327 4.0945597 3.6859991 4
## 328 328 3.8364296 3.9555679 4
## 329 329 3.9525787 4.7345219 4
## 330 330 4.4383050 4.0457942 4
## 331 331 3.6406599 4.3905430 4
## 332 332 NA 4.2134911 4
## 333 333 NA 3.7573330 4
## 334 334 3.5565935 4.1645013 4
## 335 335 3.9266785 3.8393305 4
## 336 336 4.3669101 4.1737403 4
## 337 337 4.0541001 4.1037426 4
## 338 338 NA 4.2760638 4
## 339 339 4.1979865 4.4269043 4
## 340 340 NA 3.9333475 4
## 341 341 4.4481982 4.1750107 4
## 342 342 4.1149822 4.1838701 4
## 343 343 NA 3.6389154 4
## 344 344 NA 3.9116499 4
## 345 345 3.9325027 4.1438197 4
## 346 346 4.3739662 3.9824950 4
## 347 347 3.9566056 3.8946457 4
## 348 348 4.0582495 3.8343217 4
## 349 349 4.0228510 3.8863071 4
## 350 350 4.5821048 4.1458209 4
## 351 351 3.8650351 3.9805668 4
## 352 352 4.0403204 3.8558202 4
## 353 353 4.1236375 4.2442608 4
## 354 354 3.9066946 4.0137895 4
## 355 355 NA 4.2002033 4
## 356 356 NA 4.4084621 4
## 357 357 4.1958159 3.7809229 4
## 358 358 4.1252388 4.4175897 4
## 359 359 4.0829571 4.3375948 4
## 360 360 NA 3.9279509 4
## 361 361 4.1764366 4.0724829 4
## 362 362 4.3124123 3.5704137 4
## 363 363 3.3435430 4.1069496 4
## 364 364 4.3407060 4.2315522 4
## 365 365 4.1325530 4.0919474 4
## 366 366 3.9235495 3.2245887 4
## 367 367 4.5689692 4.2002139 4
## 368 368 4.5960803 3.7843448 4
## 369 369 NA 4.0091081 4
## 370 370 NA 3.7281339 4
## 371 371 4.0386516 3.7446616 4
## 372 372 4.1898889 3.8100170 4
## 373 373 NA 4.4314731 4
## 374 374 3.7114216 4.1480784 4
## 375 375 3.9080832 4.0036010 4
## 376 376 3.5074064 3.8485071 4
## 377 377 3.6531409 3.5823952 4
## 378 378 3.9791064 3.9305876 4
## 379 379 NA 3.8854777 4
## 380 380 4.3347121 3.9965572 4
## 381 381 3.9593086 3.7610437 4
## 382 382 4.3769150 4.3976243 4
## 383 383 NA 4.3646940 4
## 384 384 4.0252329 4.5869691 4
## 385 385 NA 3.9123737 4
## 386 386 4.4999342 4.3462749 4
## 387 387 3.9031587 4.0120649 4
## 388 388 4.0884901 4.2716017 4
## 389 389 4.3656236 4.2483480 4
## 390 390 NA 3.9236437 4
## 391 391 4.2461660 3.9907240 4
## 392 392 4.3619378 3.8281770 4
## 393 393 4.0253087 3.8700423 4
## 394 394 3.7252807 4.2826156 4
## 395 395 3.8768540 4.0218492 4
## 396 396 NA 4.0839695 4
## 397 397 3.9845756 4.5274781 4
## 398 398 4.3175399 3.9474645 4
## 399 399 4.1956247 3.6506497 4
## 400 400 3.9629929 4.4813448 4
## 401 401 NA 5.0569303 5
## 402 402 NA 4.9093912 5
## 403 403 5.1810277 4.7126132 5
## 404 404 4.5770969 4.8603005 5
## 405 405 5.5510416 5.1442615 5
## 406 406 4.6207706 4.8672681 5
## 407 407 NA 5.1397374 5
## 408 408 5.0665965 4.6585505 5
## 409 409 5.1835886 5.3322633 5
## 410 410 NA 4.9830286 5
## 411 411 4.9562200 5.5049811 5
## 412 412 5.2718581 5.2405135 5
## 413 413 5.2084373 4.9560566 5
## 414 414 4.9721527 5.3795614 5
## 415 415 5.1814817 4.8598929 5
## 416 416 5.6640558 4.9886294 5
## 417 417 4.8236729 5.2310906 5
## 418 418 5.0248687 5.3233466 5
## 419 419 5.2635765 4.9208782 5
## 420 420 4.7660847 5.5413918 5
## 421 421 4.9247488 4.9033880 5
## 422 422 5.0183630 5.3193518 5
## 423 423 5.2639345 5.3483237 5
## 424 424 4.8960107 5.5002529 5
## 425 425 4.8715654 4.5184043 5
## 426 426 4.7970624 4.9077186 5
## 427 427 4.8582851 5.2208421 5
## 428 428 5.1047219 5.0046923 5
## 429 429 4.9271671 5.4422334 5
## 430 430 5.0680027 4.8948732 5
## 431 431 4.9295152 5.6661267 5
## 432 432 5.2617314 5.4790931 5
## 433 433 NA 4.5694669 5
## 434 434 NA 5.6557459 5
## 435 435 5.4158041 5.1627972 5
## 436 436 5.0162186 5.5942489 5
## 437 437 NA 5.1983505 5
## 438 438 5.6727011 5.2197938 5
## 439 439 NA 5.1416786 5
## 440 440 NA 5.7314609 5
## 441 441 4.6232391 4.5484302 5
## 442 442 4.7028692 4.6354168 5
## 443 443 4.6446092 5.0542124 5
## 444 444 4.8668172 4.4917569 5
## 445 445 5.8587500 5.0461115 5
## 446 446 4.5827101 4.3551495 5
## 447 447 4.7592270 4.6402052 5
## 448 448 4.7851855 4.9353877 5
## 449 449 4.9766000 5.0017814 5
## 450 450 5.0633071 4.7822601 5
## 451 451 4.6576744 5.0956024 5
## 452 452 4.8617920 4.5624429 5
## 453 453 5.3491110 4.8368894 5
## 454 454 5.4456517 5.0278979 5
## 455 455 4.8107401 5.1965530 5
## 456 456 5.0508482 4.6658209 5
## 457 457 4.7841330 4.9769776 5
## 458 458 4.7169797 5.0811367 5
## 459 459 5.5069816 5.6701431 5
## 460 460 5.1915878 4.8684516 5
## 461 461 4.9683996 4.9602568 5
## 462 462 4.8230330 5.6446796 5
## 463 463 5.2747394 5.1737542 5
## 464 464 4.9458842 4.8583164 5
## 465 465 4.7860390 5.0114536 5
## 466 466 NA 5.1314488 5
## 467 467 5.1029722 4.5914207 5
## 468 468 5.1994724 4.8600189 5
## 469 469 NA 5.1175816 5
## 470 470 NA 4.7536672 5
## 471 471 5.0869352 5.1629523 5
## 472 472 4.8054829 5.1358627 5
## 473 473 5.0382698 5.1894259 5
## 474 474 4.8631021 4.9282662 5
## 475 475 4.3164392 4.8911148 5
## 476 476 4.6409209 4.4486824 5
## 477 477 5.4132810 5.2804849 5
## 478 478 4.8010176 4.7578252 5
## 479 479 NA 4.8786333 5
## 480 480 5.0187851 5.0409791 5
## 481 481 4.4393572 4.9250130 5
## 482 482 5.0292903 4.5412575 5
## 483 483 5.3828960 4.8183111 5
## 484 484 4.8450897 5.4215614 5
## 485 485 4.5558001 5.0734896 5
## 486 486 NA 4.7052461 5
## 487 487 NA 5.0065545 5
## 488 488 4.7898177 5.3107710 5
## 489 489 5.2205857 4.9906757 5
## 490 490 5.2708506 5.2940148 5
## 491 491 4.6971965 5.2695113 5
## 492 492 NA 4.6601760 5
## 493 493 NA 4.5972000 5
## 494 494 NA 5.0664296 5
## 495 495 5.1229746 5.1062571 5
## 496 496 5.2805337 5.4254368 5
## 497 497 4.5811619 4.6489740 5
## 498 498 NA 5.1451297 5
## 499 499 5.1040017 5.0568612 5
## 500 500 4.9806582 4.6061834 5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.984
and plotting the results (without the NA’s) we get:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).
Let’s move to higher dimensions (and higher noise):
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1
Let’s compare it to R vanilla kmeans:
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)
d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.75
We can see that kmeans++ clusters significantly better than R vanilla kmeans.
we can set the seed for the c++ random number generator, for reproducible results:
km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE