# 根据R中的条件添加元素rep的列和nb

``````   Groups Names COL1  COL2  COL3        COL4
1      G1   SP1    1 0.400 0.500   Sequence1
2      G1   SP1    1 0.004 0.005   Sequence2
3      G1   SP1    0 0.004 0.005   Sequence3
4      G1   SP2    0 0.400 0.005 Sequence123
5      G1   SP2    0 0.004 0.500  Sequence14
6      G1   SP3    0 0.005 0.006  Sequence15
7      G1   SP5    1 0.400 0.006  Sequence16
8      G1   SP6    1 0.008 0.002  Sequence20
10     G2   Sp1    0 0.004 0.005  Sequence17
11     G2   SP1    0 0.050 0.600  Sequence18
12     G2   SP1    0 0.400 0.600   Sequence3
13     G2   SP2    0 0.004 0.005  Sequence22
14     G2   SP2    0 0.004 0.005  Sequence23
15     G2   SP5    0 0.004 0.005  Sequence16
16     G2   SP6    0 0.003 0.002  Sequence21
17     G2   SP7    0 0.560 0.760  Sequence67
18     G3   SP5    0 0.87  0.767  Sequence16
``````

and I would like to add a new column `COL5` Where I add a 1 if for each Names in Groups, we have shared Sequences within groups. For instance let's look at the G1.

The `SP1` has the `Sequence3` that is present in `G1` and in `G2`, so I put the number of rep for `row 3 and 12`. here (2) Same for `SP5` wich has `Sequence15` in `G1` and `Sequence15` in `G2` and `G3`, (here the number of rep is 3)

``````   Groups Names COL1  COL2  COL3        COL4 COL5
1      G1   SP1    1 0.400 0.500   Sequence1 0
2      G1   SP1    1 0.004 0.005   Sequence2 0
3      G1   SP1    0 0.004 0.005   Sequence3 2
4      G1   SP2    0 0.400 0.005 Sequence123 0
5      G1   SP2    0 0.004 0.500  Sequence14 0
6      G1   SP3    0 0.005 0.006  Sequence15 0
7      G1   SP5    1 0.400 0.006  Sequence16 3
8      G1   SP6    1 0.008 0.002  Sequence20 0
10     G2   Sp1    0 0.004 0.005  Sequence17 0
11     G2   SP1    0 0.050 0.600  Sequence18 0
12     G2   SP1    0 0.400 0.600   Sequence3 2
13     G2   SP2    0 0.004 0.005  Sequence22 0
14     G2   SP2    0 0.004 0.005  Sequence23 0
15     G2   SP5    0 0.004 0.005  Sequence16 3
16     G2   SP6    0 0.003 0.002  Sequence21 0
17     G2   SP7    0 0.560 0.760  Sequence67 0
18     G3   SP5    0 0.87  0.767  Sequence16 3
``````

Here is the `dput`:

``````dput(test_df)
structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,3L), .Label = c("G1", "G2","G3"), class = "factor"),
Names = structure(c(2L, 2L, 2L, 3L, 3L, 4L, 5L, 6L, 1L, 2L,
2L, 3L, 3L, 5L, 6L, 7L,5L), .Label = c("Sp1", "SP1", "SP2",
"SP3", "SP5", "SP6", "SP7","SP5"), class = "factor"), COL1 = c(1L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
,0L), COL2 = c(0.4, 0.004, 0.004, 0.4, 0.004, 0.005, 0.4, 0.008,
0.004, 0.05, 0.4, 0.004, 0.004, 0.004, 0.003, 0.56,0.87), COL3 = c(0.5,
0.005, 0.005, 0.005, 0.5, 0.006, 0.006, 0.002, 0.005, 0.6,
0.6, 0.005, 0.005, 0.005, 0.002, 0.76,0.767 ), COL4 = structure(c(1L,
8L, 13L, 2L, 3L, 4L, 5L, 9L, 6L, 7L, 13L, 11L, 12L, 5L, 10L,
14L), .Label = c("Sequence1", "Sequence123", "Sequence14",
"Sequence15", "Sequence16", "Sequence17", "Sequence18", "Sequence2",
"Sequence20", "Sequence21", "Sequence22", "Sequence23", "Sequence3",
"Sequence67","Sequence16"), class = "factor")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "10", "11", "12", "13", "14",
"15", "16", "17","18"))
``````

id_cum

We can count number of unique `Groups` for each `COL4` value and assign 1/0 value if they are more than 1.

``````library(dplyr)
test_df %>% group_by(COL4) %>% mutate(COL5 = +(n_distinct(Groups) > 1))

#   Groups Names  COL1  COL2  COL3 COL4         COL5
#   <fct>  <fct> <int> <dbl> <dbl> <fct>       <int>
# 1 G1     SP1       1 0.4   0.5   Sequence1       0
# 2 G1     SP1       1 0.004 0.005 Sequence2       0
# 3 G1     SP1       0 0.004 0.005 Sequence3       1
# 4 G1     SP2       0 0.4   0.005 Sequence123     0
# 5 G1     SP2       0 0.004 0.5   Sequence21      0
# 6 G1     SP3       0 0.005 0.006 Sequence15      1
# 7 G1     SP5       1 0.4   0.006 Sequence16      1
# 8 G1     SP6       1 0.008 0.002 Sequence20      0
# 9 G2     Sp1       0 0.004 0.005 Sequence17      0
#10 G2     SP1       0 0.05  0.6   Sequence18      0
#11 G2     SP1       0 0.4   0.6   Sequence3       1
#12 G2     SP2       0 0.004 0.005 Sequence22      0
#13 G2     SP2       0 0.004 0.005 Sequence23      0
#14 G2     SP5       0 0.004 0.005 Sequence16      1
#15 G2     SP6       0 0.003 0.002 Sequence21      0
#16 G2     SP7       0 0.56  0.76  Sequence67      0
#17 G3     SP5       0 0.87  0.767 Sequence15      1
``````

Or in `data.table` :

``````library(data.table)
setDT(test_df)[, COL5 := +(uniqueN(Groups) > 1), COL4]
``````

``````test_df\$COL5 <- with(test_df, as.integer(ave(as.character(Groups), COL4,
FUN = function(x) length(unique(x))) > 1))
``````