10  Simulation & Parentage

This chapter covers the forward-time population simulation framework and parentage analysis tools in gstudio.

library(gstudio)
library(dplyr, warn.conflicts = FALSE)

10.1 Building Populations from Frequencies

10.1.1 Allele Frequency Specification

The simulation tools start with specifying allele frequencies per population and locus. The make_populations() function requires a data.frame with Population, Locus, Allele, and Frequency columns:

f1 <- data.frame(
  Population = "A",
  Locus = rep(c("Loc1", "Loc2", "Loc3"), each = 2),
  Allele = rep(c("01", "02"), times = 3),
  Frequency = c(0.2, 0.8, 0.3, 0.7, 0.4, 0.6)
)
f2 <- data.frame(
  Population = "B",
  Locus = rep(c("Loc1", "Loc2", "Loc3"), each = 2),
  Allele = rep(c("01", "02"), times = 3),
  Frequency = c(0.7, 0.3, 0.5, 0.5, 0.6, 0.4)
)
f3 <- data.frame(
  Population = "C",
  Locus = rep(c("Loc1", "Loc2", "Loc3"), each = 2),
  Allele = rep(c("01", "02"), times = 3),
  Frequency = c(0.4, 0.6, 0.3, 0.7, 0.2, 0.8)
)
freqs <- rbind(f1, f2, f3)
freqs$Population <- factor(freqs$Population, ordered = TRUE)

10.1.2 Creating Populations

pop <- make_populations(freqs, N = 30)
head(pop)
  Population ID  Loc1  Loc2  Loc3
1          A  1 02:02 01:02 01:02
2          A  2 01:02 01:02 02:02
3          A  3 02:02 02:02 02:02
4          A  4 02:02 02:02 01:02
5          A  5 02:02 01:02 02:02
6          A  6 02:02 02:02 01:02
table(pop$Population)

 A  B  C 
30 30 30 

You can specify an inbreeding level with F:

pop_inbred <- make_populations(freqs, N = 30, F = 0.3)

10.2 Migration Models

The migration_matrix() function creates a K x K migration matrix where rows sum to 1. Element [i,j] is the proportion of population i that originates from population j.

10.2.1 Island Model

Equal migration among all populations:

migration_matrix(3, model = "island", m = 0.05)
      Pop1  Pop2  Pop3
Pop1 0.950 0.025 0.025
Pop2 0.025 0.950 0.025
Pop3 0.025 0.025 0.950

10.2.2 1D Stepping Stone

Migration only between adjacent populations:

migration_matrix(c("A", "B", "C", "D"),
                 model = "stepping_stone_1d", m = 0.1)
     A    B    C    D
A 0.90 0.10 0.00 0.00
B 0.05 0.90 0.05 0.00
C 0.00 0.05 0.90 0.05
D 0.00 0.00 0.10 0.90

10.2.3 2D Stepping Stone

Grid-based adjacency:

migration_matrix(4, model = "stepping_stone_2d",
                 m = 0.1, nr = 2, nc = 2)
     Pop1 Pop2 Pop3 Pop4
Pop1 0.90 0.05 0.05 0.00
Pop2 0.05 0.90 0.00 0.05
Pop3 0.05 0.00 0.90 0.05
Pop4 0.00 0.05 0.05 0.90

10.2.4 Distance-Based

Migration rate decays with distance:

coords <- matrix(c(0, 0, 1, 0, 0.5, 1), ncol = 2, byrow = TRUE)
migration_matrix(3, model = "distance", m = 0.1, coords = coords)
          Pop1      Pop2      Pop3
Pop1 0.9000000 0.0527864 0.0472136
Pop2 0.0527864 0.9000000 0.0472136
Pop3 0.0500000 0.0500000 0.9000000

10.3 Migration Events

A migration_event pairs a migration matrix with a generation interval, allowing temporal regime changes:

m1 <- migration_matrix(3, model = "island", m = 0.05)
m2 <- migration_matrix(3, model = "island", m = 0.20)

# Low migration for generations 1-50
evt1 <- migration_event(m1, start = 1, end = 50)

# High migration from generation 51 onward
evt2 <- migration_event(m2, start = 51)

10.4 Mutation Models

The mutation_model() function defines how alleles mutate during simulation:

# Infinite Alleles Model
mm_iam <- mutation_model(rate = 0.001, model = "iam")

# K-Allele Model (10 possible states)
mm_kam <- mutation_model(rate = 0.001, model = "kam", k = 10)

# Stepwise Mutation Model
mm_smm <- mutation_model(rate = 0.001, model = "smm")

10.5 Forward-Time Simulation

The simulate_pop() function runs a multi-generation forward-time simulation. Each generation cycle: census, migrate, mate, mutate.

10.5.1 Basic Simulation

pop <- make_populations(freqs, N = 30)
result <- simulate_pop(pop, ngen = 20, verbose = FALSE)
head(result)
  ID Population  Loc1  Loc2  Loc3
1  1          A 01:02 01:01 01:02
2  2          A 02:02 01:01 01:01
3  3          A 02:02 01:02 02:02
4  4          A 02:02 01:02 01:02
5  5          A 02:02 01:02 02:02
6  6          A 01:02 01:01 01:01

10.5.2 With Migration

m <- migration_matrix(3, model = "island", m = 0.05)
evt <- migration_event(m, start = 1)

result <- simulate_pop(pop, ngen = 20,
                       migration = evt,
                       verbose = FALSE)

10.5.3 With Mutation

mm <- mutation_model(rate = 0.01, model = "iam")
result <- simulate_pop(pop, ngen = 20,
                       mutation = mm,
                       verbose = FALSE)

10.5.4 With Mixed Mating (Selfing)

result <- simulate_pop(pop, ngen = 20,
                       selfing_rate = 0.3,
                       verbose = FALSE)

10.5.5 Census Snapshots

Save population snapshots at regular intervals:

result <- simulate_pop(pop, ngen = 100,
                       census_interval = 25,
                       census_dir = tempdir(),
                       verbose = FALSE)
# Snapshots saved as gen_0025.rds, gen_0050.rds, etc.

10.5.6 Complete Worked Example

Simulate drift and migration in a 3-population system, then analyze the outcome:

# Create starting populations
pop <- make_populations(freqs, N = 50)

# Set up island model migration
m <- migration_matrix(3, model = "island", m = 0.02)
evt <- migration_event(m, start = 1)

# Run simulation
set.seed(42)
final <- simulate_pop(pop, ngen = 50,
                      migration = evt,
                      verbose = FALSE)

# Analyze structure in the final generation
genetic_structure(final, stratum = "Population", mode = "Gst")
       Locus       Gst        Hs        Ht        P
1       Loc1 0.2240740 0.3267340 0.4210891 0.224074
2       Loc2 0.2950070 0.3156229 0.4476965 0.295007
3       Loc3 0.1449410 0.4195286 0.4906429 0.144941
4 Multilocus 0.2188736 1.0618855 1.3594285       NA
# Compare diversity before and after
genetic_diversity(pop, stratum = "Population", mode = "He")
   Stratum Locus     He
2        A  Loc1 0.3200
3        A  Loc2 0.4118
4        A  Loc3 0.4800
5        B  Loc1 0.4118
6        B  Loc2 0.5000
7        B  Loc3 0.4800
8        C  Loc1 0.4800
9        C  Loc2 0.4118
10       C  Loc3 0.3200
genetic_diversity(final, stratum = "Population", mode = "He")
   Stratum Locus     He
2        A  Loc1 0.0000
3        A  Loc2 0.4712
4        A  Loc3 0.4422
5        B  Loc1 0.4712
6        B  Loc2 0.0000
7        B  Loc3 0.4838
8        C  Loc1 0.4992
9        C  Loc2 0.4662
10       C  Loc3 0.3200

10.6 Parentage Analysis

10.6.1 Mating and Offspring

The mate() function creates offspring from two parents:

f <- data.frame(
  Locus = rep(paste0("Loc", 1:4), each = 3),
  Allele = rep(LETTERS[1:3], times = 4),
  Frequency = rep(c(1/3, 1/3, 1/3), times = 4)
)
adults <- make_population(f, N = 20)
offs <- mate(adults[1, ], adults[2, ], N = 5)
offs
  ID Loc1 Loc2 Loc3 Loc4
1  1  A:C  C:C  B:C  B:C
2  1  A:A  A:C  B:C  B:C
3  1  B:C  C:C  B:C  B:C
4  1  A:C  A:C  B:B  B:C
5  1  A:B  A:C  B:C  B:C

10.6.2 Maternal Subtraction

The minus_mom() function removes the maternal contribution from offspring genotypes. It takes a single data.frame where mothers have OffID = 0 and offspring have OffID != 0:

# Combine mother and offspring into one data.frame
mom_row <- adults[1, ]
mom_row$OffID <- 0
offs$OffID <- 1:nrow(offs)
family <- rbind(mom_row, offs)
paternal <- minus_mom(family)
paternal
  ID Loc1 Loc2 Loc3 Loc4 OffID
2  1  A:C    C  B:C    C     1
3  1    A  A:C  B:C    C     2
4  1    B    C  B:C    C     3
5  1  A:C  A:C    B    C     4
6  1    B  A:C  B:C    C     5

10.6.3 Fractional Paternity

The paternity() function estimates the probability of each potential father siring each offspring:

offs$OffID <- 1:nrow(offs)
offs$MomID <- adults$ID[1]
pat <- paternity(offs, adults[1, ], adults)
head(pat)
  MomID OffID DadID        Fij
1     1     1     2 0.29629630
2     1     1     7 0.14814815
3     1     1    12 0.14814815
4     1     1    15 0.14814815
5     1     1    18 0.14814815
6     1     1    16 0.07407407

The Fij column gives the fractional paternity probability. Use strict = TRUE to retain only unambiguous assignments:

pat_strict <- paternity(offs, adults[1, ], adults, strict = TRUE)
pat_strict
[1] MomID OffID DadID Fij  
<0 rows> (or 0-length row.names)

10.6.4 Single-Parent Exclusion

The parent_finder() function identifies compatible parents for each offspring using exclusion. It requires a data.frame with ID and OffID columns, where adults have OffID = 0:

# Prepare a combined data.frame
locus_cols <- column_class(adults, "locus")
adults2 <- adults[, c("ID", locus_cols)]
adults2$OffID <- 0
offs2 <- offs[, c("ID", locus_cols)]
offs2$OffID <- 1:nrow(offs2)
combined <- rbind(adults2, offs2)
result <- parent_finder(combined)
head(result)
  ID OffID ParentID       T
1  1     1        1 0.06250
2  1     1        2 0.06250
4  1     1        7 0.06250
6  1     1       15 0.06250
3  1     1        5 0.03125
5  1     1       12 0.03125

10.6.5 Identifying Bad Parents

The bad_parents() function flags parent-offspring pairs that are genetically incompatible:

bad_parents(offs, adults)

10.7 Bundled Parentage Datasets

The cornus and cornus_florida datasets contain Cornus florida genotype data for parentage analysis:

data(cornus)
head(cornus)
  Population SampleID X.Coordinate Y.Coordinate   Cf.G8  Cf.H18   Cf.N5  Cf.N10
1          2      203         3040         6146 157:165 117:119 170:170 189:191
2          2      204         3070         6114 159:163 101:103 166:170 193:201
3          2      205         3096         6148 151:157 113:119         193:195
4          2      206         3124         6136 153:161 117:119         197:201
5          2      207         3082         6126 159:163 105:119 158:170 193:193
6          2      208         3148         5180 181:181 119:119 170:170 199:201
    Cf.O5
1 198:198
2 178:182
3 178:182
4 178:198
5 178:196
6 176:182
data(cornus_florida)
head(cornus_florida)
   ID OffID    X    Y      G8     H18      N5     N10      O5
1 226     0 1392 3534 162:180 114:114 124:126 192:192 185:195
2 232     0 1656 3414 158:180 112:112 124:126 184:192 185:185
3 234     0 1718 3330 158:180  112:96 128:128 184:192 185:185
4 300     0 1175 3114 180:188 112:116 126:126 198:200 191:195
5 305     0 1529 3237 154:170 122:124 124:126 188:192 181:195
6 432     0 1336 2748 164:180 114:116 124:126 198:202 185:193