Skip to contents

Create a reference panel from 1000 Genomes Project. Requires bcftools, tabix and parallel

# Get 1KG files from 
# https://hgdownload.cse.ucsc.edu/gbdb/hg19/1000Genomes/phase3/
# Location of VCFs 
DIR=/sc/arion/projects/data-ark/Public_Unrestricted/1000G/phase3/VCF/

# Location to write processed data
OUT=/sc/arion/projects/roussp01a/gabriel/ref_panels/1kg

# For each chromosome
# Filter for MAF in European samples
# use bcftools norm to split multi-allelic sites variants
# into multiple biallelic records
for CHR in $(seq 1 22)
do
  FILE=$DIR/ALL.chr${CHR}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
  bcftools view -i "EUR_AF > 0.001" $FILE | 
    bcftools norm -m - | 
    bcftools view -O b -o $OUT/1kg_chr${CHR}_norm_eur.bcf 
done

# create index for each bcf
ls $OUT/*_eur.bcf | parallel bcftools index

# Write MAP file of variant locations
cd /sc/arion/projects/roussp01a/gabriel/ref_panels/1kg/
for CHR in $(seq 1 22)
do
  echo -e "CHROM\tPOS\tID\tA1\tA2" > $OUT/1kg_chr${CHR}_norm_eur.map
  bcftools view -s HG00096 $OUT/1kg_chr${CHR}_norm_eur.bcf | 
    grep -v "#" | 
    cut -f1-5 >> $OUT/1kg_chr${CHR}_norm_eur.map 
done

Session Info

## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin23.6.0
## Running under: macOS Sonoma 14.7.1
## 
## Matrix products: default
## BLAS:   /Users/gabrielhoffman/prog/R-4.4.2/lib/libRblas.dylib 
## LAPACK: /opt/homebrew/Cellar/r/4.4.3/lib/R/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] BiocStyle_2.34.0
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.37       desc_1.4.3          R6_2.6.1            bookdown_0.42      
##  [5] fastmap_1.2.0       xfun_0.51           cachem_1.1.0        knitr_1.49         
##  [9] htmltools_0.5.8.1   rmarkdown_2.29      lifecycle_1.0.4     cli_3.6.4          
## [13] sass_0.4.9          pkgdown_2.1.1       textshaping_1.0.0   jquerylib_0.1.4    
## [17] systemfonts_1.2.1   compiler_4.4.2      tools_4.4.2         ragg_1.3.3         
## [21] bslib_0.9.0         evaluate_1.0.3      yaml_2.3.10         BiocManager_1.30.25
## [25] jsonlite_1.9.1      rlang_1.1.5         fs_1.6.5            htmlwidgets_1.6.4