Get exchange partners for fast_anticlustering()

generate_exchange_partners(
  n_exchange_partners,
  N = NULL,
  features = NULL,
  method = "random",
  categories = NULL
)

Arguments

n_exchange_partners

The number of exchange partners per element

N

The number of elements for which exchange partners; can be NULL if features is passed (it is ignored if features is passed).

features

The features for which nearest neighbours are sought if method = "RANN". May be NULL if random exchange partners are generated.

method

Currently supports "random" (default), "RANN" and "restricted_random". See details.

categories

A vector, data.frame or matrix representing one or several categorical constraints.

Value

A list of length N. Is usually used as input to the argument exchange_partners in fast_anticlustering. Then, the i'th element of the list contains the indices of the exchange partners that are used for the i'th element.

Details

The method = "RANN" generates exchange partners using a nearest neighbour search via nn2 from the RANN package; methode = "restricted_random" generates random exchange partners but ensures that for each element, no duplicates are generated and that the element itself does not occur as exchange partner (this is the slowest method, and I would not recommend it for large N); method = "random" (default) does not impose these restrictions and generates unrescricted random partners (it may therefore generate duplicates and the element itself as exchange partner).

When setting the categories argument and using method = "RANN", exchange partners (i.e., nearest neighbours) will be generated from the same category; methode = "restricted_random" will also adhere to categorical constraints induced via categories (i.e. each element only receives exchange partners from the same category as itself); methode = "random" cannot incoorporate categorical restrictions.

Examples


# Restricted random method generates no duplicates per element and cannot return 
# the element itself as exchange partner
generate_exchange_partners(5, N = 10, method = "restricted_random")
#> [[1]]
#> [1] 9 4 3 8 2
#> 
#> [[2]]
#> [1] 8 7 4 1 5
#> 
#> [[3]]
#> [1] 7 9 1 6 5
#> 
#> [[4]]
#> [1] 9 5 1 7 3
#> 
#> [[5]]
#> [1]  6  3  2  4 10
#> 
#> [[6]]
#> [1] 10  9  1  3  4
#> 
#> [[7]]
#> [1]  6  5  8  4 10
#> 
#> [[8]]
#> [1]  1  3 10  4  6
#> 
#> [[9]]
#> [1] 6 1 5 8 4
#> 
#> [[10]]
#> [1] 8 1 4 2 6
#> 
# "random" simply randomizes with replacement and without restrictions
# (categorical restrictions are also not possible; is much faster for large data sets)
generate_exchange_partners(5, N = 10, method = "random")
#> [[1]]
#> [1]  2  9 10  5  3
#> 
#> [[2]]
#> [1] 3 4 6 4 3
#> 
#> [[3]]
#> [1]  2  7  1  5 10
#> 
#> [[4]]
#> [1] 6 6 4 7 1
#> 
#> [[5]]
#> [1] 6 3 2 1 3
#> 
#> [[6]]
#> [1] 8 3 5 9 4
#> 
#> [[7]]
#> [1] 3 5 4 4 9
#> 
#> [[8]]
#> [1] 3 3 7 5 5
#> 
#> [[9]]
#> [1]  9 10  7  9  1
#> 
#> [[10]]
#> [1] 1 3 7 4 6
#> 
# May return less than 5 exchange partners if there are not enough members 
# of the same category: 
generate_exchange_partners(
  5, N = 10, 
  method = "restricted_random", 
  categories = cbind(schaper2019$room, schaper2019$frequency)
)
#> [[1]]
#> [1] 37 71 41 12 66
#> 
#> [[2]]
#> [1] 70 36 69 67 77
#> 
#> [[3]]
#> [1] 34 44 13 46  4
#> 
#> [[4]]
#> [1] 46 34  3 14 13
#> 
#> [[5]]
#> integer(0)
#> 
#> [[6]]
#> [1]  1 41 12 71 37
#> 
#> [[7]]
#> [1] 47 48 80 11 35
#> 
#> [[8]]
#> [1] 34 44  3 13  4
#> 
#> [[9]]
#> integer(0)
#> 
#> [[10]]
#> [1] 75 39 45
#> 
#> [[11]]
#> [1] 80  7 35 47 48
#> 
#> [[12]]
#> [1] 41 66  6 71 37
#> 
#> [[13]]
#> [1]  8 46 44 14  3
#> 
#> [[14]]
#> [1] 13 44 34  4  8
#> 
#> [[15]]
#> [1] 76 80 73 35 11
#> 
#> [[16]]
#> [1] 79 33 40
#> 
#> [[17]]
#> [1] 96 28 30 85 63
#> 
#> [[18]]
#> [1] 85 28 96 88 30
#> 
#> [[19]]
#> [1] 54 61 60 84
#> 
#> [[20]]
#> [1] 52 22 89 53
#> 
#> [[21]]
#> [1] 23 57 90
#> 
#> [[22]]
#> [1] 89 53 52 20
#> 
#> [[23]]
#> [1] 90 57 21
#> 
#> [[24]]
#> integer(0)
#> 
#> [[25]]
#> [1] 27 81 58 82 59
#> 
#> [[26]]
#> [1] 87 55 92 62 31
#> 
#> [[27]]
#> [1] 29 25 82 58 81
#> 
#> [[28]]
#> [1] 88 96 63 17 30
#> 
#> [[29]]
#> [1] 27 82 25 81 59
#> 
#> [[30]]
#> [1] 85 28 17 18 63
#> 
#> [[31]]
#> [1] 62 92 83 55 26
#> 
#> [[32]]
#> [1] 86
#> 
#> [[33]]
#> [1] 40 79 16
#> 
#> [[34]]
#> [1]  3  4 46  8 44
#> 
#> [[35]]
#> [1] 80 15 42 76 47
#> 
#> [[36]]
#> [1] 65 77 70 69  2
#> 
#> [[37]]
#> [1]  6 12 71 66  1
#> 
#> [[38]]
#> [1] 11 73 42 35 47
#> 
#> [[39]]
#> [1] 45 75 10
#> 
#> [[40]]
#> [1] 33 16 79
#> 
#> [[41]]
#> [1] 66 12  1 71 37
#> 
#> [[42]]
#> [1] 38  7 15 80 35
#> 
#> [[43]]
#> [1] 74 68
#> 
#> [[44]]
#> [1] 13  8  4 34 14
#> 
#> [[45]]
#> [1] 39 10 75
#> 
#> [[46]]
#> [1]  3 44  8  4 13
#> 
#> [[47]]
#> [1] 73 11 15 48 76
#> 
#> [[48]]
#> [1] 80 11 47 73 76
#> 
#> [[49]]
#> [1] 87 83 26 55 62
#> 
#> [[50]]
#> [1] 94 95 91
#> 
#> [[51]]
#> [1] 83 87 62 55 49
#> 
#> [[52]]
#> [1] 20 53 89 22
#> 
#> [[53]]
#> [1] 22 52 20 89
#> 
#> [[54]]
#> [1] 61 60 84 19
#> 
#> [[55]]
#> [1] 26 62 92 83 49
#> 
#> [[56]]
#> integer(0)
#> 
#> [[57]]
#> [1] 21 90 23
#> 
#> [[58]]
#> [1] 27 25 59 82 29
#> 
#> [[59]]
#> [1] 25 29 58 82 81
#> 
#> [[60]]
#> [1] 84 19 54 61
#> 
#> [[61]]
#> [1] 60 19 84 54
#> 
#> [[62]]
#> [1] 49 26 51 87 31
#> 
#> [[63]]
#> [1] 17 18 88 85 30
#> 
#> [[64]]
#> [1] 93
#> 
#> [[65]]
#> [1] 69 67  2 36 77
#> 
#> [[66]]
#> [1] 37 71 12 41  1
#> 
#> [[67]]
#> [1] 69 36 70  2 72
#> 
#> [[68]]
#> [1] 43 74
#> 
#> [[69]]
#> [1] 67 72 70  2 77
#> 
#> [[70]]
#> [1] 36 67 77 69  2
#> 
#> [[71]]
#> [1] 66 12  1 37  6
#> 
#> [[72]]
#> [1] 69 67 77  2 70
#> 
#> [[73]]
#> [1] 80 47 42 11 15
#> 
#> [[74]]
#> [1] 43 68
#> 
#> [[75]]
#> [1] 39 45 10
#> 
#> [[76]]
#> [1]  7 15 11 42 80
#> 
#> [[77]]
#> [1] 69 70 36 72 65
#> 
#> [[78]]
#> integer(0)
#> 
#> [[79]]
#> [1] 33 16 40
#> 
#> [[80]]
#> [1] 15 47 11 38  7
#> 
#> [[81]]
#> [1] 29 58 27 82 25
#> 
#> [[82]]
#> [1] 81 27 59 29 58
#> 
#> [[83]]
#> [1] 31 62 51 55 87
#> 
#> [[84]]
#> [1] 54 19 61 60
#> 
#> [[85]]
#> [1] 17 63 18 30 28
#> 
#> [[86]]
#> [1] 32
#> 
#> [[87]]
#> [1] 55 26 92 83 49
#> 
#> [[88]]
#> [1] 85 18 30 17 96
#> 
#> [[89]]
#> [1] 53 22 52 20
#> 
#> [[90]]
#> [1] 21 23 57
#> 
#> [[91]]
#> [1] 94 95 50
#> 
#> [[92]]
#> [1] 62 51 87 49 26
#> 
#> [[93]]
#> [1] 64
#> 
#> [[94]]
#> [1] 91 50 95
#> 
#> [[95]]
#> [1] 91 50 94
#> 
#> [[96]]
#> [1] 88 18 85 28 17
#> 
# using nearest neighbour search (unlike RANN::nn2, this does not 
# return the ID of the element itself as neighbour)
generate_exchange_partners(5, features = schaper2019[, 3:5], method = "RANN")[1:3]
#> [[1]]
#> [1] 65 74 43 83 21
#> 
#> [[2]]
#> [1] 18 10 90 82 42
#> 
#> [[3]]
#> [1] 11 35  4 92 93
#> 
# compare with RANN directly:
RANN::nn2(schaper2019[, 3:5], k = 6)$nn.idx[1:3, ] # note k = 6
#>      [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,]    1   65   74   43   83   21
#> [2,]    2   18   10   90   82   42
#> [3,]    3   11   35    4   92   93