Basic usage of the package.

Basic usage

First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:

data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
##      id        V1        V2 true_clust
## 1     1 1.0573519 1.7478028          1
## 2     2 0.8887782 0.7696357          1
## 3     3 0.5269692 1.1493946          1
## 4     4 1.3208460 0.7824137          1
## 5     5 0.8126979 1.5054613          1
## 6     6 1.1336741 1.0945942          1
## 7     7 0.6759592 1.1531551          1
## 8     8 0.9540415 1.0529514          1
## 9     9 1.0006569 0.8315230          1
## 10   10 1.0536077 1.3770325          1
## 11   11 1.2585677 0.9205839          1
## 12   12 0.8650592 1.1372276          1
## 13   13 1.2668903 0.6216743          1
## 14   14 0.9522934 1.1224825          1
## 15   15 1.2696019 0.6984273          1
## 16   16 1.0727619 1.6396694          1
## 17   17 1.3975651 1.2097224          1
## 18   18 1.1757708 0.9048688          1
## 19   19 1.1410109 1.0434194          1
## 20   20 1.4004919 0.7503464          1
## 21   21 0.7653808 1.0777968          1
## 22   22 0.6732523 0.7906839          1
## 23   23 1.3472177 0.3429748          1
## 24   24 0.9413393 0.9503336          1
## 25   25 0.6842599 1.0403824          1
## 26   26 1.2746448 0.8712267          1
## 27   27 0.8354292 1.3091385          1
## 28   28 1.3018118 0.4171895          1
## 29   29 1.0826636 0.9239769          1
## 30   30 0.9336756 0.3525257          1
## 31   31 0.5483580 0.7138900          1
## 32   32 0.7591176 0.5363568          1
## 33   33 1.1538858 1.1079824          1
## 34   34 0.5369660 1.3539198          1
## 35   35 0.5442541 1.1136956          1
## 36   36 1.0531991 0.7798019          1
## 37   37 0.9962604 0.7152946          1
## 38   38 0.9696953 0.5923428          1
## 39   39 0.7437666 1.1423566          1
## 40   40 0.6010118 0.7181032          1
## 41   41 1.1351772 1.0386381          1
## 42   42 0.2437977 0.7528686          1
## 43   43 1.6215292 1.1479554          1
## 44   44 0.9475248 0.6548386          1
## 45   45 0.9597051 1.1258225          1
## 46   46 0.4850797 1.0008841          1
## 47   47 1.3914477 0.9196976          1
## 48   48 0.5641701 1.0907631          1
## 49   49 1.2659405 0.5198630          1
## 50   50 1.4996709 0.7589652          1
## 51   51 1.1188109 0.7218772          1
## 52   52 1.0615364 1.2542256          1
## 53   53 1.3208234 0.8193131          1
## 54   54 0.7073582 0.7141170          1
## 55   55 0.8038330 0.6260761          1
## 56   56 0.8037724 0.9136561          1
## 57   57 0.6688927 0.5611317          1
## 58   58 0.7331415 1.0390434          1
## 59   59 0.9758408 0.7093584          1
## 60   60 1.1862083 0.5044526          1
## 61   61 0.7982409 1.1079171          1
## 62   62 1.1752958 0.7158176          1
## 63   63 1.2827413 0.6460942          1
## 64   64 1.1534405 1.1042519          1
## 65   65 1.1837145 0.8843736          1
## 66   66 1.0995425 0.7952274          1
## 67   67 1.1567166 0.4780196          1
## 68   68 1.4009678 1.2769785          1
## 69   69 1.0116143 1.0279067          1
## 70   70 0.8864627 1.0235342          1
## 71   71 0.5915490 0.6496933          1
## 72   72 0.6570099 0.7774256          1
## 73   73 1.0502020 0.6826228          1
## 74   74 1.2245178 1.3484963          1
## 75   75 1.0590163 1.1499019          1
## 76   76 1.1681089 0.9064248          1
## 77   77 0.9417025 0.9732908          1
## 78   78 1.3080705 1.4753877          1
## 79   79 0.9378239 0.9017197          1
## 80   80 1.5940799 0.8228211          1
## 81   81 0.9799102 1.0903962          1
## 82   82 1.0814642 1.0459977          1
## 83   83 1.1512591 1.1450822          1
## 84   84 1.1563915 0.8992355          1
## 85   85 1.6003158 1.2937698          1
## 86   86 1.2047170 1.3241660          1
## 87   87 0.8906743 1.0305262          1
## 88   88 1.0232895 1.0955173          1
## 89   89 1.2447572 0.6185842          1
## 90   90 0.8438121 0.8225880          1
## 91   91 0.8170360 1.1927344          1
## 92   92 0.8759901 0.9877621          1
## 93   93 1.2447778 1.1029247          1
## 94   94 1.1317417 0.6398270          1
## 95   95 1.0557678 1.2873381          1
## 96   96 0.3132488 0.8014289          1
## 97   97 1.0642112 0.8221881          1
## 98   98 0.6471848 1.3936839          1
## 99   99 1.2265419 0.9881312          1
## 100 100 0.8808854 1.2001093          1
## 101 101 1.8771498 1.7732672          2
## 102 102 1.8656529 1.6102812          2
## 103 103 1.8902129 2.2716912          2
## 104 104 2.2528410 2.2421950          2
## 105 105 2.0146969 2.2037186          2
## 106 106 2.0571673 2.1831962          2
## 107 107 1.5153879 1.4021520          2
## 108 108 1.7644909 2.4067985          2
## 109 109 1.7870424 2.1608651          2
## 110 110 2.0583481 1.2250943          2
## 111 111 2.3458291 1.5937778          2
## 112 112 2.2847881 2.2453937          2
## 113 113 2.2171585 2.1001149          2
## 114 114 2.0621500 1.9473138          2
## 115 115 1.7385439 1.9590821          2
## 116 116 2.0307697 2.1765444          2
## 117 117 2.2137788 1.8862327          2
## 118 118 2.2423609 2.3209563          2
## 119 119 2.0750316 2.0551506          2
## 120 120 1.4492930 2.2375473          2
## 121 121 1.9913989 2.0278606          2
## 122 122 1.8850259 2.1148138          2
## 123 123 1.9707447 2.4592672          2
## 124 124 2.2075753 2.0948015          2
## 125 125 2.0126049 2.6308717          2
## 126 126 2.0283098 1.8761194          2
## 127 127 1.9463263 1.6075172          2
## 128 128 1.9703637 2.1165193          2
## 129 129 1.9737552 2.1636836          2
## 130 130 2.0916448 2.1695019          2
## 131 131 1.6349939 1.8360027          2
## 132 132 1.7901780 1.7874707          2
## 133 133 1.4701850 1.5788929          2
## 134 134 1.7807438 2.0392232          2
## 135 135 1.4754489 1.7173733          2
## 136 136 1.5034552 1.7905576          2
## 137 137 1.8888263 2.2175118          2
## 138 138 2.5301248 2.4090070          2
## 139 139 2.1706763 1.9731797          2
## 140 140 1.7870885 2.0117899          2
## 141 141 2.0399442 2.0871188          2
## 142 142 2.0921740 2.4561481          2
## 143 143 2.0505578 1.2893545          2
## 144 144 1.9369227 2.3211463          2
## 145 145 1.5930952 1.8895866          2
## 146 146 1.5869744 2.1308455          2
## 147 147 1.6693242 1.7933147          2
## 148 148 2.2202213 2.2461861          2
## 149 149 2.3228489 1.7847326          2
## 150 150 1.5944095 2.1800490          2
## 151 151 1.7018048 1.8475247          2
## 152 152 2.3509971 1.8496367          2
## 153 153 2.0795178 2.0882344          2
## 154 154 2.1188523 2.0773122          2
## 155 155 2.3126464 1.5343615          2
## 156 156 1.7105233 2.6269813          2
## 157 157 2.5267670 1.6151418          2
## 158 158 2.4375357 2.0788427          2
## 159 159 2.3534221 1.9087801          2
## 160 160 2.2168841 2.2102064          2
## 161 161 1.6816889 2.0190378          2
## 162 162 2.1181298 1.8870981          2
## 163 163 1.3220489 2.1816763          2
## 164 164 2.2043243 1.4676456          2
## 165 165 2.0173552 1.9194359          2
## 166 166 1.4915602 1.9062962          2
## 167 167 1.9075814 1.8654798          2
## 168 168 1.9283135 1.8666695          2
## 169 169 1.4946530 2.0449109          2
## 170 170 2.1452323 2.2475670          2
## 171 171 1.8060153 1.9800374          2
## 172 172 2.3146444 1.4553192          2
## 173 173 1.5117372 2.3444982          2
## 174 174 1.6535444 1.7997107          2
## 175 175 1.2499836 2.4468576          2
## 176 176 1.9890634 1.7668571          2
## 177 177 2.0078663 1.9689432          2
## 178 178 2.1826120 1.9883723          2
## 179 179 2.0141587 1.9979885          2
## 180 180 1.5795955 2.0853344          2
## 181 181 2.1758930 1.6282237          2
## 182 182 1.5479711 1.5534837          2
## 183 183 2.4613298 2.0013553          2
## 184 184 1.5060333 2.0199955          2
## 185 185 1.6711205 2.1533425          2
## 186 186 2.3032199 1.9896463          2
## 187 187 1.6791318 1.6290961          2
## 188 188 1.8374137 2.2410427          2
## 189 189 2.2772023 2.3338793          2
## 190 190 1.9595828 2.2527371          2
## 191 191 2.3284564 2.1093576          2
## 192 192 1.7016235 1.6914032          2
## 193 193 1.5762973 2.3082377          2
## 194 194 1.8821912 1.5825925          2
## 195 195 1.8848727 1.8853790          2
## 196 196 2.2597503 2.0622894          2
## 197 197 2.2323333 2.2830704          2
## 198 198 1.6201120 1.7764591          2
## 199 199 1.6523724 2.0193403          2
## 200 200 1.9539210 2.2344252          2
## 201 201 2.5819722 2.6567837          3
## 202 202 3.7832316 3.3028839          3
## 203 203 2.8610307 2.9080235          3
## 204 204 2.9901584 3.0943378          3
## 205 205 2.8070174 3.0703758          3
## 206 206 3.1339659 2.4290466          3
## 207 207 3.0380446 3.1386832          3
## 208 208 3.0281548 2.6853569          3
## 209 209 2.7162959 2.6574430          3
## 210 210 3.5930636 3.5263689          3
## 211 211 2.7343347 3.1306349          3
## 212 212 3.0474078 3.2267974          3
## 213 213 2.6853599 3.2922745          3
## 214 214 3.2395651 3.1359853          3
## 215 215 2.9669598 2.6972206          3
## 216 216 3.2683105 3.5274607          3
## 217 217 3.1273191 3.0142304          3
## 218 218 3.1373893 3.2388580          3
## 219 219 3.3383650 3.2082553          3
## 220 220 2.9975194 2.3522521          3
## 221 221 3.6153644 2.7892620          3
## 222 222 2.8461284 2.9314056          3
## 223 223 3.0818398 2.8281676          3
## 224 224 3.3851424 2.7852211          3
## 225 225 2.2244761 3.5469746          3
## 226 226 2.8095517 2.4626160          3
## 227 227 3.6508666 3.3560274          3
## 228 228 2.7399855 2.9685999          3
## 229 229 2.9550281 3.5272008          3
## 230 230 3.3785572 2.7083776          3
## 231 231 3.1683925 3.3237273          3
## 232 232 2.3411670 2.7699882          3
## 233 233 3.1370998 2.6864259          3
## 234 234 3.0956364 3.4672988          3
## 235 235 3.5666822 3.0121527          3
## 236 236 3.0622586 2.5919551          3
## 237 237 3.4237445 3.1104173          3
## 238 238 3.2756589 3.0391673          3
## 239 239 2.4897821 2.9606401          3
## 240 240 2.9341736 2.9715870          3
## 241 241 2.4684781 2.5386491          3
## 242 242 2.8460596 3.1439480          3
## 243 243 2.8189948 2.7818898          3
## 244 244 3.0199555 3.0133392          3
## 245 245 2.7966737 2.9273637          3
## 246 246 3.5255893 3.2752274          3
## 247 247 3.0797900 2.4478439          3
## 248 248 2.8447704 3.0511925          3
## 249 249 2.9218640 3.1880018          3
## 250 250 3.0945030 2.8852678          3
## 251 251 3.0406291 2.6547352          3
## 252 252 3.0128696 2.9649426          3
## 253 253 3.4363800 3.1199428          3
## 254 254 2.4605726 3.2040897          3
## 255 255 3.2086744 3.1462461          3
## 256 256 2.6813105 3.4488977          3
## 257 257 2.9036231 2.4466426          3
## 258 258 3.1365861 3.1815340          3
## 259 259 3.1253695 2.7634753          3
## 260 260 3.0787704 3.3339114          3
## 261 261 3.2313037 2.9723319          3
## 262 262 2.9445187 3.0200138          3
## 263 263 2.6034877 2.5892338          3
## 264 264 2.8454410 2.7943757          3
## 265 265 2.6499147 3.4176901          3
## 266 266 3.0505713 3.7679258          3
## 267 267 3.0833351 2.7855170          3
## 268 268 3.2226999 3.0969466          3
## 269 269 3.2216109 2.8514640          3
## 270 270 2.6036952 3.0328911          3
## 271 271 3.1782502 2.9253867          3
## 272 272 2.7684246 2.5690026          3
## 273 273 3.2782945 3.2051712          3
## 274 274 2.8761220 2.9241752          3
## 275 275 2.9073361 3.3500443          3
## 276 276 2.7977983 3.0704919          3
## 277 277 2.7571229 3.3002550          3
## 278 278 3.0211307 2.7796439          3
## 279 279 3.2905422 2.7314109          3
## 280 280 2.9442027 3.2482132          3
## 281 281 2.4908940 2.9041798          3
## 282 282 2.5040276 3.0997270          3
## 283 283 2.7244374 3.1601071          3
## 284 284 2.4381125 2.6579235          3
## 285 285 2.6848554 3.0012474          3
## 286 286 2.6023956 2.8879919          3
## 287 287 2.7914558 3.1804200          3
## 288 288 3.0592855 3.2297995          3
## 289 289 3.2365817 3.2208429          3
## 290 290 3.4655675 2.7035687          3
## 291 291 3.0905029 3.0045550          3
## 292 292 3.2903506 3.0102220          3
## 293 293 2.9555249 2.5675525          3
## 294 294 3.5858495 2.9978746          3
## 295 295 3.0235324 3.0744484          3
## 296 296 3.0216215 3.1364387          3
## 297 297 2.8722143 3.1256463          3
## 298 298 2.6795480 2.6906942          3
## 299 299 2.8275441 2.8454521          3
## 300 300 2.8541627 3.2153109          3
## 301 301 3.9319212 4.1047787          4
## 302 302 4.2031011 4.1359434          4
## 303 303 4.0141791 4.0903680          4
## 304 304 4.5563307 4.0732038          4
## 305 305 4.4486916 4.0189316          4
## 306 306 4.0397612 4.0247592          4
## 307 307 3.8532171 4.5728605          4
## 308 308 3.9524919 3.6402595          4
## 309 309 4.0389840 4.1587783          4
## 310 310 4.3185560 3.7215885          4
## 311 311 4.1128665 4.0560297          4
## 312 312 4.4458704 4.0531084          4
## 313 313 4.4317854 3.8649695          4
## 314 314 4.1173581 3.5166461          4
## 315 315 3.6585833 4.2296273          4
## 316 316 3.9598596 3.7078570          4
## 317 317 4.2311779 4.0549252          4
## 318 318 3.8440411 4.3013155          4
## 319 319 4.0072497 3.9959807          4
## 320 320 3.5685618 4.6784372          4
## 321 321 3.8635897 3.9775809          4
## 322 322 3.6831961 3.9346015          4
## 323 323 4.2624566 3.9525968          4
## 324 324 3.9087102 4.2356386          4
## 325 325 3.8966738 3.6831030          4
## 326 326 4.2609210 3.9631854          4
## 327 327 4.0945597 3.6859991          4
## 328 328 3.8364296 3.9555679          4
## 329 329 3.9525787 4.7345219          4
## 330 330 4.4383050 4.0457942          4
## 331 331 3.6406599 4.3905430          4
## 332 332 4.0212268 4.2134911          4
## 333 333 3.6725628 3.7573330          4
## 334 334 3.5565935 4.1645013          4
## 335 335 3.9266785 3.8393305          4
## 336 336 4.3669101 4.1737403          4
## 337 337 4.0541001 4.1037426          4
## 338 338 3.9346532 4.2760638          4
## 339 339 4.1979865 4.4269043          4
## 340 340 4.2375841 3.9333475          4
## 341 341 4.4481982 4.1750107          4
## 342 342 4.1149822 4.1838701          4
## 343 343 4.2563582 3.6389154          4
## 344 344 3.8626997 3.9116499          4
## 345 345 3.9325027 4.1438197          4
## 346 346 4.3739662 3.9824950          4
## 347 347 3.9566056 3.8946457          4
## 348 348 4.0582495 3.8343217          4
## 349 349 4.0228510 3.8863071          4
## 350 350 4.5821048 4.1458209          4
## 351 351 3.8650351 3.9805668          4
## 352 352 4.0403204 3.8558202          4
## 353 353 4.1236375 4.2442608          4
## 354 354 3.9066946 4.0137895          4
## 355 355 3.7516307 4.2002033          4
## 356 356 3.6020295 4.4084621          4
## 357 357 4.1958159 3.7809229          4
## 358 358 4.1252388 4.4175897          4
## 359 359 4.0829571 4.3375948          4
## 360 360 3.9502335 3.9279509          4
## 361 361 4.1764366 4.0724829          4
## 362 362 4.3124123 3.5704137          4
## 363 363 3.3435430 4.1069496          4
## 364 364 4.3407060 4.2315522          4
## 365 365 4.1325530 4.0919474          4
## 366 366 3.9235495 3.2245887          4
## 367 367 4.5689692 4.2002139          4
## 368 368 4.5960803 3.7843448          4
## 369 369 4.0514892 4.0091081          4
## 370 370 3.9323193 3.7281339          4
## 371 371 4.0386516 3.7446616          4
## 372 372 4.1898889 3.8100170          4
## 373 373 3.5825260 4.4314731          4
## 374 374 3.7114216 4.1480784          4
## 375 375 3.9080832 4.0036010          4
## 376 376 3.5074064 3.8485071          4
## 377 377 3.6531409 3.5823952          4
## 378 378 3.9791064 3.9305876          4
## 379 379 3.3747293 3.8854777          4
## 380 380 4.3347121 3.9965572          4
## 381 381 3.9593086 3.7610437          4
## 382 382 4.3769150 4.3976243          4
## 383 383 3.5241727 4.3646940          4
## 384 384 4.0252329 4.5869691          4
## 385 385 3.8200147 3.9123737          4
## 386 386 4.4999342 4.3462749          4
## 387 387 3.9031587 4.0120649          4
## 388 388 4.0884901 4.2716017          4
## 389 389 4.3656236 4.2483480          4
## 390 390 4.0927912 3.9236437          4
## 391 391 4.2461660 3.9907240          4
## 392 392 4.3619378 3.8281770          4
## 393 393 4.0253087 3.8700423          4
## 394 394 3.7252807 4.2826156          4
## 395 395 3.8768540 4.0218492          4
## 396 396 3.9353468 4.0839695          4
## 397 397 3.9845756 4.5274781          4
## 398 398 4.3175399 3.9474645          4
## 399 399 4.1956247 3.6506497          4
## 400 400 3.9629929 4.4813448          4
## 401 401 5.0939315 5.0569303          5
## 402 402 5.4797333 4.9093912          5
## 403 403 5.1810277 4.7126132          5
## 404 404 4.5770969 4.8603005          5
## 405 405 5.5510416 5.1442615          5
## 406 406 4.6207706 4.8672681          5
## 407 407 5.3149470 5.1397374          5
## 408 408 5.0665965 4.6585505          5
## 409 409 5.1835886 5.3322633          5
## 410 410 5.3885771 4.9830286          5
## 411 411 4.9562200 5.5049811          5
## 412 412 5.2718581 5.2405135          5
## 413 413 5.2084373 4.9560566          5
## 414 414 4.9721527 5.3795614          5
## 415 415 5.1814817 4.8598929          5
## 416 416 5.6640558 4.9886294          5
## 417 417 4.8236729 5.2310906          5
## 418 418 5.0248687 5.3233466          5
## 419 419 5.2635765 4.9208782          5
## 420 420 4.7660847 5.5413918          5
## 421 421 4.9247488 4.9033880          5
## 422 422 5.0183630 5.3193518          5
## 423 423 5.2639345 5.3483237          5
## 424 424 4.8960107 5.5002529          5
## 425 425 4.8715654 4.5184043          5
## 426 426 4.7970624 4.9077186          5
## 427 427 4.8582851 5.2208421          5
## 428 428 5.1047219 5.0046923          5
## 429 429 4.9271671 5.4422334          5
## 430 430 5.0680027 4.8948732          5
## 431 431 4.9295152 5.6661267          5
## 432 432 5.2617314 5.4790931          5
## 433 433 5.1537129 4.5694669          5
## 434 434 5.3240808 5.6557459          5
## 435 435 5.4158041 5.1627972          5
## 436 436 5.0162186 5.5942489          5
## 437 437 5.0449211 5.1983505          5
## 438 438 5.6727011 5.2197938          5
## 439 439 4.6527547 5.1416786          5
## 440 440 4.7959970 5.7314609          5
## 441 441 4.6232391 4.5484302          5
## 442 442 4.7028692 4.6354168          5
## 443 443 4.6446092 5.0542124          5
## 444 444 4.8668172 4.4917569          5
## 445 445 5.8587500 5.0461115          5
## 446 446 4.5827101 4.3551495          5
## 447 447 4.7592270 4.6402052          5
## 448 448 4.7851855 4.9353877          5
## 449 449 4.9766000 5.0017814          5
## 450 450 5.0633071 4.7822601          5
## 451 451 4.6576744 5.0956024          5
## 452 452 4.8617920 4.5624429          5
## 453 453 5.3491110 4.8368894          5
## 454 454 5.4456517 5.0278979          5
## 455 455 4.8107401 5.1965530          5
## 456 456 5.0508482 4.6658209          5
## 457 457 4.7841330 4.9769776          5
## 458 458 4.7169797 5.0811367          5
## 459 459 5.5069816 5.6701431          5
## 460 460 5.1915878 4.8684516          5
## 461 461 4.9683996 4.9602568          5
## 462 462 4.8230330 5.6446796          5
## 463 463 5.2747394 5.1737542          5
## 464 464 4.9458842 4.8583164          5
## 465 465 4.7860390 5.0114536          5
## 466 466 5.1625189 5.1314488          5
## 467 467 5.1029722 4.5914207          5
## 468 468 5.1994724 4.8600189          5
## 469 469 5.5347419 5.1175816          5
## 470 470 4.8573577 4.7536672          5
## 471 471 5.0869352 5.1629523          5
## 472 472 4.8054829 5.1358627          5
## 473 473 5.0382698 5.1894259          5
## 474 474 4.8631021 4.9282662          5
## 475 475 4.3164392 4.8911148          5
## 476 476 4.6409209 4.4486824          5
## 477 477 5.4132810 5.2804849          5
## 478 478 4.8010176 4.7578252          5
## 479 479 5.0961231 4.8786333          5
## 480 480 5.0187851 5.0409791          5
## 481 481 4.4393572 4.9250130          5
## 482 482 5.0292903 4.5412575          5
## 483 483 5.3828960 4.8183111          5
## 484 484 4.8450897 5.4215614          5
## 485 485 4.5558001 5.0734896          5
## 486 486 4.9906114 4.7052461          5
## 487 487 5.0706787 5.0065545          5
## 488 488 4.7898177 5.3107710          5
## 489 489 5.2205857 4.9906757          5
## 490 490 5.2708506 5.2940148          5
## 491 491 4.6971965 5.2695113          5
## 492 492 4.5837589 4.6601760          5
## 493 493 5.2856180 4.5972000          5
## 494 494 5.0437964 5.0664296          5
## 495 495 5.1229746 5.1062571          5
## 496 496 5.2805337 5.4254368          5
## 497 497 4.5811619 4.6489740          5
## 498 498 4.7703773 5.1451297          5
## 499 499 5.1040017 5.0568612          5
## 500 500 4.9806582 4.6061834          5

This is how our data looks like:

data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

Now we can cluster it using kmeans++:

data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 147 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 437 dist was 2.34612
## add new core from 437 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 386 dist was 0.862664
## add new core from 386 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 92 dist was 0.683466
## add new core from 92 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 213 dist was 0.572069
## add new core from 213 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 6
## KMEans: iter 1
## KMEans: iter 2 changed 5
## KMEans: iter 2
## KMEans: iter 3 changed 0

The returned list contains 3 fields:

names(km)
## [1] "centers" "cluster" "size"

km$centers contains a tibble with clust column and the cluster centers:

km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  2.98 2.99 
## 2     2  1.94 2.00 
## 3     3  5.02 5.04 
## 4     4  4.04 4.03 
## 5     5  1.01 0.952

clusters are numbered according to order_func (see ‘Custom cluster ordering’ section).

km$cluster contains tibble with id column with the observation id (1:n if no id column was supplied), and clust column with the observation assigned cluster:

km$cluster
## # A tibble: 500 × 2
##    id    clust
##    <chr> <int>
##  1 1         5
##  2 2         5
##  3 3         5
##  4 4         5
##  5 5         5
##  6 6         5
##  7 7         5
##  8 8         5
##  9 9         5
## 10 10        5
## # ℹ 490 more rows

km$size contains tibble with clust column and n column with the number of points in each cluster:

km$size
## # A tibble: 5 × 2
##   clust     n
##   <int> <int>
## 1     1    98
## 2     2    99
## 3     3    99
## 4     4   103
## 5     5   101

We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters function is internal to the package and is used only in this vignette):

d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.992

And plot the results:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

Custom cluster ordering

By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers)))) - hclust of the euclidean distance of the correlation matrix of the centers.

We can supply our own function to order the clusters using reorder_func argument. The function would be applied to each center and he clusters would be ordered by the result.

km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  1.01 0.952
## 2     2  1.94 2.00 
## 3     3  2.97 2.99 
## 4     4  4.03 4.03 
## 5     5  5.02 5.04

Missing data

tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:

data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data
##      id        V1        V2 true_clust
## 1     1 1.0573519 1.7478028          1
## 2     2 0.8887782 0.7696357          1
## 3     3 0.5269692 1.1493946          1
## 4     4 1.3208460 0.7824137          1
## 5     5 0.8126979 1.5054613          1
## 6     6 1.1336741 1.0945942          1
## 7     7 0.6759592 1.1531551          1
## 8     8 0.9540415 1.0529514          1
## 9     9 1.0006569 0.8315230          1
## 10   10 1.0536077 1.3770325          1
## 11   11 1.2585677 0.9205839          1
## 12   12 0.8650592 1.1372276          1
## 13   13 1.2668903 0.6216743          1
## 14   14 0.9522934 1.1224825          1
## 15   15        NA 0.6984273          1
## 16   16 1.0727619 1.6396694          1
## 17   17 1.3975651 1.2097224          1
## 18   18 1.1757708 0.9048688          1
## 19   19 1.1410109 1.0434194          1
## 20   20 1.4004919 0.7503464          1
## 21   21 0.7653808 1.0777968          1
## 22   22 0.6732523 0.7906839          1
## 23   23        NA 0.3429748          1
## 24   24 0.9413393 0.9503336          1
## 25   25 0.6842599 1.0403824          1
## 26   26 1.2746448 0.8712267          1
## 27   27 0.8354292 1.3091385          1
## 28   28 1.3018118 0.4171895          1
## 29   29        NA 0.9239769          1
## 30   30 0.9336756 0.3525257          1
## 31   31        NA 0.7138900          1
## 32   32 0.7591176 0.5363568          1
## 33   33 1.1538858 1.1079824          1
## 34   34 0.5369660 1.3539198          1
## 35   35 0.5442541 1.1136956          1
## 36   36 1.0531991 0.7798019          1
## 37   37 0.9962604 0.7152946          1
## 38   38 0.9696953 0.5923428          1
## 39   39 0.7437666 1.1423566          1
## 40   40 0.6010118 0.7181032          1
## 41   41 1.1351772 1.0386381          1
## 42   42        NA 0.7528686          1
## 43   43 1.6215292 1.1479554          1
## 44   44 0.9475248 0.6548386          1
## 45   45        NA 1.1258225          1
## 46   46 0.4850797 1.0008841          1
## 47   47 1.3914477 0.9196976          1
## 48   48        NA 1.0907631          1
## 49   49 1.2659405 0.5198630          1
## 50   50 1.4996709 0.7589652          1
## 51   51        NA 0.7218772          1
## 52   52 1.0615364 1.2542256          1
## 53   53 1.3208234 0.8193131          1
## 54   54 0.7073582 0.7141170          1
## 55   55 0.8038330 0.6260761          1
## 56   56        NA 0.9136561          1
## 57   57 0.6688927 0.5611317          1
## 58   58 0.7331415 1.0390434          1
## 59   59 0.9758408 0.7093584          1
## 60   60 1.1862083 0.5044526          1
## 61   61 0.7982409 1.1079171          1
## 62   62 1.1752958 0.7158176          1
## 63   63 1.2827413 0.6460942          1
## 64   64 1.1534405 1.1042519          1
## 65   65 1.1837145 0.8843736          1
## 66   66 1.0995425 0.7952274          1
## 67   67 1.1567166 0.4780196          1
## 68   68 1.4009678 1.2769785          1
## 69   69 1.0116143 1.0279067          1
## 70   70 0.8864627 1.0235342          1
## 71   71 0.5915490 0.6496933          1
## 72   72        NA 0.7774256          1
## 73   73 1.0502020 0.6826228          1
## 74   74 1.2245178 1.3484963          1
## 75   75 1.0590163 1.1499019          1
## 76   76        NA 0.9064248          1
## 77   77 0.9417025 0.9732908          1
## 78   78 1.3080705 1.4753877          1
## 79   79 0.9378239 0.9017197          1
## 80   80        NA 0.8228211          1
## 81   81        NA 1.0903962          1
## 82   82        NA 1.0459977          1
## 83   83        NA 1.1450822          1
## 84   84 1.1563915 0.8992355          1
## 85   85 1.6003158 1.2937698          1
## 86   86 1.2047170 1.3241660          1
## 87   87 0.8906743 1.0305262          1
## 88   88        NA 1.0955173          1
## 89   89 1.2447572 0.6185842          1
## 90   90 0.8438121 0.8225880          1
## 91   91 0.8170360 1.1927344          1
## 92   92 0.8759901 0.9877621          1
## 93   93 1.2447778 1.1029247          1
## 94   94        NA 0.6398270          1
## 95   95 1.0557678 1.2873381          1
## 96   96        NA 0.8014289          1
## 97   97 1.0642112 0.8221881          1
## 98   98 0.6471848 1.3936839          1
## 99   99 1.2265419 0.9881312          1
## 100 100 0.8808854 1.2001093          1
## 101 101 1.8771498 1.7732672          2
## 102 102        NA 1.6102812          2
## 103 103        NA 2.2716912          2
## 104 104 2.2528410 2.2421950          2
## 105 105 2.0146969 2.2037186          2
## 106 106 2.0571673 2.1831962          2
## 107 107 1.5153879 1.4021520          2
## 108 108 1.7644909 2.4067985          2
## 109 109 1.7870424 2.1608651          2
## 110 110 2.0583481 1.2250943          2
## 111 111 2.3458291 1.5937778          2
## 112 112        NA 2.2453937          2
## 113 113 2.2171585 2.1001149          2
## 114 114 2.0621500 1.9473138          2
## 115 115 1.7385439 1.9590821          2
## 116 116 2.0307697 2.1765444          2
## 117 117        NA 1.8862327          2
## 118 118 2.2423609 2.3209563          2
## 119 119        NA 2.0551506          2
## 120 120 1.4492930 2.2375473          2
## 121 121 1.9913989 2.0278606          2
## 122 122        NA 2.1148138          2
## 123 123 1.9707447 2.4592672          2
## 124 124        NA 2.0948015          2
## 125 125 2.0126049 2.6308717          2
## 126 126 2.0283098 1.8761194          2
## 127 127 1.9463263 1.6075172          2
## 128 128        NA 2.1165193          2
## 129 129        NA 2.1636836          2
## 130 130 2.0916448 2.1695019          2
## 131 131        NA 1.8360027          2
## 132 132 1.7901780 1.7874707          2
## 133 133        NA 1.5788929          2
## 134 134 1.7807438 2.0392232          2
## 135 135 1.4754489 1.7173733          2
## 136 136 1.5034552 1.7905576          2
## 137 137 1.8888263 2.2175118          2
## 138 138 2.5301248 2.4090070          2
## 139 139 2.1706763 1.9731797          2
## 140 140 1.7870885 2.0117899          2
## 141 141 2.0399442 2.0871188          2
## 142 142 2.0921740 2.4561481          2
## 143 143 2.0505578 1.2893545          2
## 144 144 1.9369227 2.3211463          2
## 145 145 1.5930952 1.8895866          2
## 146 146 1.5869744 2.1308455          2
## 147 147 1.6693242 1.7933147          2
## 148 148 2.2202213 2.2461861          2
## 149 149 2.3228489 1.7847326          2
## 150 150        NA 2.1800490          2
## 151 151 1.7018048 1.8475247          2
## 152 152        NA 1.8496367          2
## 153 153        NA 2.0882344          2
## 154 154 2.1188523 2.0773122          2
## 155 155 2.3126464 1.5343615          2
## 156 156 1.7105233 2.6269813          2
## 157 157        NA 1.6151418          2
## 158 158 2.4375357 2.0788427          2
## 159 159 2.3534221 1.9087801          2
## 160 160 2.2168841 2.2102064          2
## 161 161 1.6816889 2.0190378          2
## 162 162 2.1181298 1.8870981          2
## 163 163        NA 2.1816763          2
## 164 164 2.2043243 1.4676456          2
## 165 165        NA 1.9194359          2
## 166 166 1.4915602 1.9062962          2
## 167 167 1.9075814 1.8654798          2
## 168 168 1.9283135 1.8666695          2
## 169 169 1.4946530 2.0449109          2
## 170 170 2.1452323 2.2475670          2
## 171 171 1.8060153 1.9800374          2
## 172 172 2.3146444 1.4553192          2
## 173 173 1.5117372 2.3444982          2
## 174 174 1.6535444 1.7997107          2
## 175 175        NA 2.4468576          2
## 176 176 1.9890634 1.7668571          2
## 177 177        NA 1.9689432          2
## 178 178        NA 1.9883723          2
## 179 179        NA 1.9979885          2
## 180 180 1.5795955 2.0853344          2
## 181 181 2.1758930 1.6282237          2
## 182 182 1.5479711 1.5534837          2
## 183 183 2.4613298 2.0013553          2
## 184 184 1.5060333 2.0199955          2
## 185 185        NA 2.1533425          2
## 186 186        NA 1.9896463          2
## 187 187        NA 1.6290961          2
## 188 188 1.8374137 2.2410427          2
## 189 189 2.2772023 2.3338793          2
## 190 190 1.9595828 2.2527371          2
## 191 191        NA 2.1093576          2
## 192 192 1.7016235 1.6914032          2
## 193 193 1.5762973 2.3082377          2
## 194 194 1.8821912 1.5825925          2
## 195 195 1.8848727 1.8853790          2
## 196 196 2.2597503 2.0622894          2
## 197 197 2.2323333 2.2830704          2
## 198 198 1.6201120 1.7764591          2
## 199 199 1.6523724 2.0193403          2
## 200 200 1.9539210 2.2344252          2
## 201 201 2.5819722 2.6567837          3
## 202 202 3.7832316 3.3028839          3
## 203 203 2.8610307 2.9080235          3
## 204 204 2.9901584 3.0943378          3
## 205 205 2.8070174 3.0703758          3
## 206 206 3.1339659 2.4290466          3
## 207 207 3.0380446 3.1386832          3
## 208 208 3.0281548 2.6853569          3
## 209 209        NA 2.6574430          3
## 210 210        NA 3.5263689          3
## 211 211        NA 3.1306349          3
## 212 212 3.0474078 3.2267974          3
## 213 213 2.6853599 3.2922745          3
## 214 214 3.2395651 3.1359853          3
## 215 215 2.9669598 2.6972206          3
## 216 216        NA 3.5274607          3
## 217 217 3.1273191 3.0142304          3
## 218 218 3.1373893 3.2388580          3
## 219 219 3.3383650 3.2082553          3
## 220 220 2.9975194 2.3522521          3
## 221 221 3.6153644 2.7892620          3
## 222 222 2.8461284 2.9314056          3
## 223 223 3.0818398 2.8281676          3
## 224 224        NA 2.7852211          3
## 225 225 2.2244761 3.5469746          3
## 226 226 2.8095517 2.4626160          3
## 227 227 3.6508666 3.3560274          3
## 228 228 2.7399855 2.9685999          3
## 229 229 2.9550281 3.5272008          3
## 230 230        NA 2.7083776          3
## 231 231        NA 3.3237273          3
## 232 232 2.3411670 2.7699882          3
## 233 233 3.1370998 2.6864259          3
## 234 234 3.0956364 3.4672988          3
## 235 235 3.5666822 3.0121527          3
## 236 236        NA 2.5919551          3
## 237 237 3.4237445 3.1104173          3
## 238 238 3.2756589 3.0391673          3
## 239 239 2.4897821 2.9606401          3
## 240 240 2.9341736 2.9715870          3
## 241 241 2.4684781 2.5386491          3
## 242 242 2.8460596 3.1439480          3
## 243 243 2.8189948 2.7818898          3
## 244 244 3.0199555 3.0133392          3
## 245 245 2.7966737 2.9273637          3
## 246 246 3.5255893 3.2752274          3
## 247 247 3.0797900 2.4478439          3
## 248 248 2.8447704 3.0511925          3
## 249 249 2.9218640 3.1880018          3
## 250 250 3.0945030 2.8852678          3
## 251 251 3.0406291 2.6547352          3
## 252 252 3.0128696 2.9649426          3
## 253 253 3.4363800 3.1199428          3
## 254 254 2.4605726 3.2040897          3
## 255 255 3.2086744 3.1462461          3
## 256 256 2.6813105 3.4488977          3
## 257 257        NA 2.4466426          3
## 258 258 3.1365861 3.1815340          3
## 259 259        NA 2.7634753          3
## 260 260 3.0787704 3.3339114          3
## 261 261 3.2313037 2.9723319          3
## 262 262 2.9445187 3.0200138          3
## 263 263 2.6034877 2.5892338          3
## 264 264 2.8454410 2.7943757          3
## 265 265 2.6499147 3.4176901          3
## 266 266 3.0505713 3.7679258          3
## 267 267 3.0833351 2.7855170          3
## 268 268 3.2226999 3.0969466          3
## 269 269        NA 2.8514640          3
## 270 270 2.6036952 3.0328911          3
## 271 271 3.1782502 2.9253867          3
## 272 272 2.7684246 2.5690026          3
## 273 273 3.2782945 3.2051712          3
## 274 274 2.8761220 2.9241752          3
## 275 275 2.9073361 3.3500443          3
## 276 276 2.7977983 3.0704919          3
## 277 277 2.7571229 3.3002550          3
## 278 278 3.0211307 2.7796439          3
## 279 279 3.2905422 2.7314109          3
## 280 280 2.9442027 3.2482132          3
## 281 281 2.4908940 2.9041798          3
## 282 282 2.5040276 3.0997270          3
## 283 283 2.7244374 3.1601071          3
## 284 284 2.4381125 2.6579235          3
## 285 285 2.6848554 3.0012474          3
## 286 286 2.6023956 2.8879919          3
## 287 287 2.7914558 3.1804200          3
## 288 288 3.0592855 3.2297995          3
## 289 289 3.2365817 3.2208429          3
## 290 290 3.4655675 2.7035687          3
## 291 291        NA 3.0045550          3
## 292 292        NA 3.0102220          3
## 293 293 2.9555249 2.5675525          3
## 294 294        NA 2.9978746          3
## 295 295 3.0235324 3.0744484          3
## 296 296 3.0216215 3.1364387          3
## 297 297 2.8722143 3.1256463          3
## 298 298 2.6795480 2.6906942          3
## 299 299        NA 2.8454521          3
## 300 300 2.8541627 3.2153109          3
## 301 301 3.9319212 4.1047787          4
## 302 302 4.2031011 4.1359434          4
## 303 303 4.0141791 4.0903680          4
## 304 304        NA 4.0732038          4
## 305 305 4.4486916 4.0189316          4
## 306 306 4.0397612 4.0247592          4
## 307 307        NA 4.5728605          4
## 308 308 3.9524919 3.6402595          4
## 309 309        NA 4.1587783          4
## 310 310 4.3185560 3.7215885          4
## 311 311        NA 4.0560297          4
## 312 312 4.4458704 4.0531084          4
## 313 313 4.4317854 3.8649695          4
## 314 314 4.1173581 3.5166461          4
## 315 315        NA 4.2296273          4
## 316 316 3.9598596 3.7078570          4
## 317 317 4.2311779 4.0549252          4
## 318 318 3.8440411 4.3013155          4
## 319 319 4.0072497 3.9959807          4
## 320 320 3.5685618 4.6784372          4
## 321 321        NA 3.9775809          4
## 322 322 3.6831961 3.9346015          4
## 323 323 4.2624566 3.9525968          4
## 324 324 3.9087102 4.2356386          4
## 325 325 3.8966738 3.6831030          4
## 326 326 4.2609210 3.9631854          4
## 327 327 4.0945597 3.6859991          4
## 328 328 3.8364296 3.9555679          4
## 329 329 3.9525787 4.7345219          4
## 330 330 4.4383050 4.0457942          4
## 331 331 3.6406599 4.3905430          4
## 332 332        NA 4.2134911          4
## 333 333        NA 3.7573330          4
## 334 334 3.5565935 4.1645013          4
## 335 335 3.9266785 3.8393305          4
## 336 336 4.3669101 4.1737403          4
## 337 337 4.0541001 4.1037426          4
## 338 338        NA 4.2760638          4
## 339 339 4.1979865 4.4269043          4
## 340 340        NA 3.9333475          4
## 341 341 4.4481982 4.1750107          4
## 342 342 4.1149822 4.1838701          4
## 343 343        NA 3.6389154          4
## 344 344        NA 3.9116499          4
## 345 345 3.9325027 4.1438197          4
## 346 346 4.3739662 3.9824950          4
## 347 347 3.9566056 3.8946457          4
## 348 348 4.0582495 3.8343217          4
## 349 349 4.0228510 3.8863071          4
## 350 350 4.5821048 4.1458209          4
## 351 351 3.8650351 3.9805668          4
## 352 352 4.0403204 3.8558202          4
## 353 353 4.1236375 4.2442608          4
## 354 354 3.9066946 4.0137895          4
## 355 355        NA 4.2002033          4
## 356 356        NA 4.4084621          4
## 357 357 4.1958159 3.7809229          4
## 358 358 4.1252388 4.4175897          4
## 359 359 4.0829571 4.3375948          4
## 360 360        NA 3.9279509          4
## 361 361 4.1764366 4.0724829          4
## 362 362 4.3124123 3.5704137          4
## 363 363 3.3435430 4.1069496          4
## 364 364 4.3407060 4.2315522          4
## 365 365 4.1325530 4.0919474          4
## 366 366 3.9235495 3.2245887          4
## 367 367 4.5689692 4.2002139          4
## 368 368 4.5960803 3.7843448          4
## 369 369        NA 4.0091081          4
## 370 370        NA 3.7281339          4
## 371 371 4.0386516 3.7446616          4
## 372 372 4.1898889 3.8100170          4
## 373 373        NA 4.4314731          4
## 374 374 3.7114216 4.1480784          4
## 375 375 3.9080832 4.0036010          4
## 376 376 3.5074064 3.8485071          4
## 377 377 3.6531409 3.5823952          4
## 378 378 3.9791064 3.9305876          4
## 379 379        NA 3.8854777          4
## 380 380 4.3347121 3.9965572          4
## 381 381 3.9593086 3.7610437          4
## 382 382 4.3769150 4.3976243          4
## 383 383        NA 4.3646940          4
## 384 384 4.0252329 4.5869691          4
## 385 385        NA 3.9123737          4
## 386 386 4.4999342 4.3462749          4
## 387 387 3.9031587 4.0120649          4
## 388 388 4.0884901 4.2716017          4
## 389 389 4.3656236 4.2483480          4
## 390 390        NA 3.9236437          4
## 391 391 4.2461660 3.9907240          4
## 392 392 4.3619378 3.8281770          4
## 393 393 4.0253087 3.8700423          4
## 394 394 3.7252807 4.2826156          4
## 395 395 3.8768540 4.0218492          4
## 396 396        NA 4.0839695          4
## 397 397 3.9845756 4.5274781          4
## 398 398 4.3175399 3.9474645          4
## 399 399 4.1956247 3.6506497          4
## 400 400 3.9629929 4.4813448          4
## 401 401        NA 5.0569303          5
## 402 402        NA 4.9093912          5
## 403 403 5.1810277 4.7126132          5
## 404 404 4.5770969 4.8603005          5
## 405 405 5.5510416 5.1442615          5
## 406 406 4.6207706 4.8672681          5
## 407 407        NA 5.1397374          5
## 408 408 5.0665965 4.6585505          5
## 409 409 5.1835886 5.3322633          5
## 410 410        NA 4.9830286          5
## 411 411 4.9562200 5.5049811          5
## 412 412 5.2718581 5.2405135          5
## 413 413 5.2084373 4.9560566          5
## 414 414 4.9721527 5.3795614          5
## 415 415 5.1814817 4.8598929          5
## 416 416 5.6640558 4.9886294          5
## 417 417 4.8236729 5.2310906          5
## 418 418 5.0248687 5.3233466          5
## 419 419 5.2635765 4.9208782          5
## 420 420 4.7660847 5.5413918          5
## 421 421 4.9247488 4.9033880          5
## 422 422 5.0183630 5.3193518          5
## 423 423 5.2639345 5.3483237          5
## 424 424 4.8960107 5.5002529          5
## 425 425 4.8715654 4.5184043          5
## 426 426 4.7970624 4.9077186          5
## 427 427 4.8582851 5.2208421          5
## 428 428 5.1047219 5.0046923          5
## 429 429 4.9271671 5.4422334          5
## 430 430 5.0680027 4.8948732          5
## 431 431 4.9295152 5.6661267          5
## 432 432 5.2617314 5.4790931          5
## 433 433        NA 4.5694669          5
## 434 434        NA 5.6557459          5
## 435 435 5.4158041 5.1627972          5
## 436 436 5.0162186 5.5942489          5
## 437 437        NA 5.1983505          5
## 438 438 5.6727011 5.2197938          5
## 439 439        NA 5.1416786          5
## 440 440        NA 5.7314609          5
## 441 441 4.6232391 4.5484302          5
## 442 442 4.7028692 4.6354168          5
## 443 443 4.6446092 5.0542124          5
## 444 444 4.8668172 4.4917569          5
## 445 445 5.8587500 5.0461115          5
## 446 446 4.5827101 4.3551495          5
## 447 447 4.7592270 4.6402052          5
## 448 448 4.7851855 4.9353877          5
## 449 449 4.9766000 5.0017814          5
## 450 450 5.0633071 4.7822601          5
## 451 451 4.6576744 5.0956024          5
## 452 452 4.8617920 4.5624429          5
## 453 453 5.3491110 4.8368894          5
## 454 454 5.4456517 5.0278979          5
## 455 455 4.8107401 5.1965530          5
## 456 456 5.0508482 4.6658209          5
## 457 457 4.7841330 4.9769776          5
## 458 458 4.7169797 5.0811367          5
## 459 459 5.5069816 5.6701431          5
## 460 460 5.1915878 4.8684516          5
## 461 461 4.9683996 4.9602568          5
## 462 462 4.8230330 5.6446796          5
## 463 463 5.2747394 5.1737542          5
## 464 464 4.9458842 4.8583164          5
## 465 465 4.7860390 5.0114536          5
## 466 466        NA 5.1314488          5
## 467 467 5.1029722 4.5914207          5
## 468 468 5.1994724 4.8600189          5
## 469 469        NA 5.1175816          5
## 470 470        NA 4.7536672          5
## 471 471 5.0869352 5.1629523          5
## 472 472 4.8054829 5.1358627          5
## 473 473 5.0382698 5.1894259          5
## 474 474 4.8631021 4.9282662          5
## 475 475 4.3164392 4.8911148          5
## 476 476 4.6409209 4.4486824          5
## 477 477 5.4132810 5.2804849          5
## 478 478 4.8010176 4.7578252          5
## 479 479        NA 4.8786333          5
## 480 480 5.0187851 5.0409791          5
## 481 481 4.4393572 4.9250130          5
## 482 482 5.0292903 4.5412575          5
## 483 483 5.3828960 4.8183111          5
## 484 484 4.8450897 5.4215614          5
## 485 485 4.5558001 5.0734896          5
## 486 486        NA 4.7052461          5
## 487 487        NA 5.0065545          5
## 488 488 4.7898177 5.3107710          5
## 489 489 5.2205857 4.9906757          5
## 490 490 5.2708506 5.2940148          5
## 491 491 4.6971965 5.2695113          5
## 492 492        NA 4.6601760          5
## 493 493        NA 4.5972000          5
## 494 494        NA 5.0664296          5
## 495 495 5.1229746 5.1062571          5
## 496 496 5.2805337 5.4254368          5
## 497 497 4.5811619 4.6489740          5
## 498 498        NA 5.1451297          5
## 499 499 5.1040017 5.0568612          5
## 500 500 4.9806582 4.6061834          5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.984

and plotting the results (without the NA’s) we get:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).

High dimensions

Let’s move to higher dimensions (and higher noise):

data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1

Comparison with R vanilla kmeans

Let’s compare it to R vanilla kmeans:

km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.75

We can see that kmeans++ clusters significantly better than R vanilla kmeans.

Random seed

we can set the seed for the c++ random number generator, for reproducible results:

km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE