R/fast_anticlustering.R
generate_exchange_partners.Rd
Get exchange partners for fast_anticlustering()
generate_exchange_partners(
n_exchange_partners,
N = NULL,
features = NULL,
method = "random",
categories = NULL
)
The number of exchange partners per element
The number of elements for which exchange partners; can be
NULL
if features
is passed (it is ignored if
features
is passed).
The features for which nearest neighbours are
sought if method = "RANN"
. May be NULL if random
exchange partners are generated.
Currently supports "random" (default), "RANN" and "restricted_random". See details.
A vector, data.frame or matrix representing one or several categorical constraints.
A list of length N
. Is usually used as input to the
argument exchange_partners
in
fast_anticlustering
. Then, the i'th element of
the list contains the indices of the exchange partners that are
used for the i'th element.
The method = "RANN"
generates exchange partners using a
nearest neighbour search via nn2
from the
RANN
package; methode = "restricted_random"
generates
random exchange partners but ensures that for each element, no
duplicates are generated and that the element itself does not occur
as exchange partner (this is the slowest method, and I would not
recommend it for large N); method = "random"
(default) does
not impose these restrictions and generates unrescricted random
partners (it may therefore generate duplicates and the element
itself as exchange partner).
When setting the categories
argument and using method
= "RANN"
, exchange partners (i.e., nearest neighbours) will be
generated from the same category; methode =
"restricted_random"
will also adhere to categorical constraints
induced via categories
(i.e. each element only receives
exchange partners from the same category as itself); methode
= "random"
cannot incoorporate categorical restrictions.
# Restricted random method generates no duplicates per element and cannot return
# the element itself as exchange partner
generate_exchange_partners(5, N = 10, method = "restricted_random")
#> [[1]]
#> [1] 9 6 3 4 2
#>
#> [[2]]
#> [1] 5 3 7 1 9
#>
#> [[3]]
#> [1] 7 1 5 4 9
#>
#> [[4]]
#> [1] 2 7 5 6 9
#>
#> [[5]]
#> [1] 3 2 8 6 4
#>
#> [[6]]
#> [1] 3 2 7 5 1
#>
#> [[7]]
#> [1] 8 5 1 10 3
#>
#> [[8]]
#> [1] 3 7 6 5 2
#>
#> [[9]]
#> [1] 3 8 4 6 1
#>
#> [[10]]
#> [1] 1 3 7 6 9
#>
# "random" simply randomizes with replacement and without restrictions
# (categorical restrictions are also not possible; is much faster for large data sets)
generate_exchange_partners(5, N = 10, method = "random")
#> [[1]]
#> [1] 5 6 5 8 6
#>
#> [[2]]
#> [1] 10 8 3 1 8
#>
#> [[3]]
#> [1] 4 4 6 1 2
#>
#> [[4]]
#> [1] 4 5 4 6 5
#>
#> [[5]]
#> [1] 4 6 9 4 8
#>
#> [[6]]
#> [1] 3 9 10 7 6
#>
#> [[7]]
#> [1] 8 2 7 6 3
#>
#> [[8]]
#> [1] 4 4 5 6 3
#>
#> [[9]]
#> [1] 6 9 5 10 8
#>
#> [[10]]
#> [1] 2 10 1 10 9
#>
# May return less than 5 exchange partners if there are not enough members
# of the same category:
generate_exchange_partners(
5, N = 10,
method = "restricted_random",
categories = cbind(schaper2019$room, schaper2019$frequency)
)
#> [[1]]
#> [1] 66 41 37 6 71
#>
#> [[2]]
#> [1] 72 36 69 67 77
#>
#> [[3]]
#> [1] 13 34 44 14 8
#>
#> [[4]]
#> [1] 34 13 46 44 8
#>
#> [[5]]
#> integer(0)
#>
#> [[6]]
#> [1] 66 1 37 41 71
#>
#> [[7]]
#> [1] 48 76 15 11 38
#>
#> [[8]]
#> [1] 44 13 3 14 4
#>
#> [[9]]
#> integer(0)
#>
#> [[10]]
#> [1] 75 45 39
#>
#> [[11]]
#> [1] 42 73 76 35 38
#>
#> [[12]]
#> [1] 6 71 37 1 41
#>
#> [[13]]
#> [1] 44 34 4 46 8
#>
#> [[14]]
#> [1] 3 8 44 13 46
#>
#> [[15]]
#> [1] 76 80 47 35 7
#>
#> [[16]]
#> [1] 79 40 33
#>
#> [[17]]
#> [1] 28 96 63 85 30
#>
#> [[18]]
#> [1] 96 17 88 85 30
#>
#> [[19]]
#> [1] 60 54 61 84
#>
#> [[20]]
#> [1] 53 22 89 52
#>
#> [[21]]
#> [1] 57 23 90
#>
#> [[22]]
#> [1] 53 20 52 89
#>
#> [[23]]
#> [1] 21 57 90
#>
#> [[24]]
#> integer(0)
#>
#> [[25]]
#> [1] 59 29 81 82 58
#>
#> [[26]]
#> [1] 51 92 62 87 49
#>
#> [[27]]
#> [1] 82 58 25 29 81
#>
#> [[28]]
#> [1] 96 18 63 30 88
#>
#> [[29]]
#> [1] 25 27 81 58 59
#>
#> [[30]]
#> [1] 96 18 88 63 85
#>
#> [[31]]
#> [1] 87 26 92 49 83
#>
#> [[32]]
#> [1] 86
#>
#> [[33]]
#> [1] 79 40 16
#>
#> [[34]]
#> [1] 4 44 13 46 3
#>
#> [[35]]
#> [1] 38 47 42 7 80
#>
#> [[36]]
#> [1] 67 65 70 77 69
#>
#> [[37]]
#> [1] 71 66 1 41 12
#>
#> [[38]]
#> [1] 42 48 35 47 76
#>
#> [[39]]
#> [1] 10 75 45
#>
#> [[40]]
#> [1] 33 79 16
#>
#> [[41]]
#> [1] 71 1 12 6 66
#>
#> [[42]]
#> [1] 11 38 15 48 73
#>
#> [[43]]
#> [1] 74 68
#>
#> [[44]]
#> [1] 4 8 46 13 3
#>
#> [[45]]
#> [1] 39 10 75
#>
#> [[46]]
#> [1] 4 44 14 3 13
#>
#> [[47]]
#> [1] 48 11 38 76 80
#>
#> [[48]]
#> [1] 7 42 73 35 80
#>
#> [[49]]
#> [1] 55 62 51 92 83
#>
#> [[50]]
#> [1] 95 94 91
#>
#> [[51]]
#> [1] 26 87 55 31 83
#>
#> [[52]]
#> [1] 20 22 53 89
#>
#> [[53]]
#> [1] 20 89 52 22
#>
#> [[54]]
#> [1] 19 61 84 60
#>
#> [[55]]
#> [1] 49 26 83 62 51
#>
#> [[56]]
#> integer(0)
#>
#> [[57]]
#> [1] 90 23 21
#>
#> [[58]]
#> [1] 27 82 25 29 81
#>
#> [[59]]
#> [1] 25 58 81 82 29
#>
#> [[60]]
#> [1] 84 61 54 19
#>
#> [[61]]
#> [1] 54 84 19 60
#>
#> [[62]]
#> [1] 92 49 26 55 31
#>
#> [[63]]
#> [1] 17 18 88 96 30
#>
#> [[64]]
#> [1] 93
#>
#> [[65]]
#> [1] 70 36 69 2 77
#>
#> [[66]]
#> [1] 37 41 6 12 1
#>
#> [[67]]
#> [1] 36 77 72 69 65
#>
#> [[68]]
#> [1] 74 43
#>
#> [[69]]
#> [1] 70 65 36 77 2
#>
#> [[70]]
#> [1] 65 2 67 77 69
#>
#> [[71]]
#> [1] 41 37 12 6 66
#>
#> [[72]]
#> [1] 36 70 65 67 69
#>
#> [[73]]
#> [1] 35 38 7 42 80
#>
#> [[74]]
#> [1] 68 43
#>
#> [[75]]
#> [1] 39 10 45
#>
#> [[76]]
#> [1] 47 80 11 42 73
#>
#> [[77]]
#> [1] 67 70 65 2 72
#>
#> [[78]]
#> integer(0)
#>
#> [[79]]
#> [1] 33 16 40
#>
#> [[80]]
#> [1] 76 15 11 48 73
#>
#> [[81]]
#> [1] 58 25 82 27 29
#>
#> [[82]]
#> [1] 29 59 25 27 58
#>
#> [[83]]
#> [1] 87 55 62 49 92
#>
#> [[84]]
#> [1] 19 61 60 54
#>
#> [[85]]
#> [1] 17 18 96 28 63
#>
#> [[86]]
#> [1] 32
#>
#> [[87]]
#> [1] 31 49 51 83 26
#>
#> [[88]]
#> [1] 30 17 18 28 63
#>
#> [[89]]
#> [1] 22 53 52 20
#>
#> [[90]]
#> [1] 57 21 23
#>
#> [[91]]
#> [1] 95 94 50
#>
#> [[92]]
#> [1] 83 55 31 49 26
#>
#> [[93]]
#> [1] 64
#>
#> [[94]]
#> [1] 91 95 50
#>
#> [[95]]
#> [1] 50 91 94
#>
#> [[96]]
#> [1] 18 30 17 85 88
#>
# using nearest neighbour search (unlike RANN::nn2, this does not
# return the ID of the element itself as neighbour)
generate_exchange_partners(5, features = schaper2019[, 3:5], method = "RANN")[1:3]
#> [[1]]
#> [1] 65 74 43 83 21
#>
#> [[2]]
#> [1] 18 10 90 82 42
#>
#> [[3]]
#> [1] 11 35 4 92 93
#>
# compare with RANN directly:
RANN::nn2(schaper2019[, 3:5], k = 6)$nn.idx[1:3, ] # note k = 6
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 1 65 74 43 83 21
#> [2,] 2 18 10 90 82 42
#> [3,] 3 11 35 4 92 93