GenomicDataStream
A scalable interface between data and analysis
Loading...
Searching...
No Matches
gds::Param Struct Reference

#include <GenomicDataStream_virtual.h>

Public Member Functions

 Param ()
 Param (const string &file, string regionString="", const string &samples="-", const double MAF=0, const double minVariance=0, const int &chunkSize=10000, const bool &missingToMean=true, const bool &permuteFeatureOrder=false, const int &rndSeed=12345)
void setField (const string &field_)
void setRegions (const string &regionString)
vector< string > getRegions () const
void setSamplesFile (const string &file)
void print (ostream &out) const

Public Attributes

string file
string fileSamples = ""
string field = ""
vector< string > regions
string samples
double MAF
double minVariance
int chunkSize
bool missingToMean
FileType fileType

Detailed Description

Store parameters to pass to GenomicDataStream

Constructor & Destructor Documentation

◆ Param() [1/2]

gds::Param::Param ( )
inline

◆ Param() [2/2]

gds::Param::Param ( const string & file,
string regionString = "",
const string & samples = "-",
const double MAF = 0,
const double minVariance = 0,
const int & chunkSize = 10000,
const bool & missingToMean = true,
const bool & permuteFeatureOrder = false,
const int & rndSeed = 12345 )
inline

constructor

Parameters
file.vcf, .vcf.gz or .bcf file with tabix index
regionStringtarget in the format "chr2:1-12345". Multiple regions can be separated by one of ,\n\t, for example "chr2:1-12345, chr3:1000-8000". Setting region to "" includes all variants
samplesstring of comma separated sample IDs to extract: "ID1,ID2,ID3"
MAFminor allele frequency filter applied to variants with max value <= 2
minVarianceminimum variance filter applied to variants with max value < 2
chunkSizenumber of variants to return per chunk
missingToMeanif true, set missing values to the mean dosage value. if false, set to NaN
permuteFeatureOrderdefault is false. If true permute regions in regionString to avoid linkage disequilibrium betweeen nearby regions
rndSeedrandom seed for permutation

Note that field must be set separately for VCF/BCF: field: "GT" for genotype strings, "DS" for dosage, or another other field stored as an integer or float. "GT" is the only string type supported

Member Function Documentation

◆ getRegions()

vector< string > gds::Param::getRegions ( ) const
inline

◆ print()

void gds::Param::print ( ostream & out) const
inline

◆ setField()

void gds::Param::setField ( const string & field_)
inline

◆ setRegions()

void gds::Param::setRegions ( const string & regionString)
inline

◆ setSamplesFile()

void gds::Param::setSamplesFile ( const string & file)
inline

Custom path to PSAM/FAM file

Member Data Documentation

◆ chunkSize

int gds::Param::chunkSize

◆ field

string gds::Param::field = ""

◆ file

string gds::Param::file

◆ fileSamples

string gds::Param::fileSamples = ""

◆ fileType

FileType gds::Param::fileType

◆ MAF

double gds::Param::MAF

◆ minVariance

double gds::Param::minVariance

◆ missingToMean

bool gds::Param::missingToMean

◆ regions

vector<string> gds::Param::regions

◆ samples

string gds::Param::samples

The documentation for this struct was generated from the following file: