13#include "genfile/bgen/View.hpp"
14#include "genfile/bgen/IndexQuery.hpp"
20 template<
typename T >
21 string atoi(
const T &value ) {
22 std::ostringstream stream ;
30 vector<int>
const& ploidy_dimension,
31 vector<double> * data,
32 vector<int>
const& data_dimension,
35 map<size_t, size_t>
const& requested_samples
38 m_ploidy_dimension( ploidy_dimension ),
40 m_data_dimension( data_dimension ),
42 m_variant_i( variant_i ),
43 m_requested_samples( requested_samples ),
44 m_requested_sample_i( m_requested_samples.begin() ),
46 m_order_type( genfile::eUnknownOrderType )
48 assert( m_data_dimension[0] == m_ploidy_dimension[0] ) ;
49 assert( m_data_dimension[1] == m_ploidy_dimension[1] ) ;
50 assert( m_data_dimension[1] == m_requested_samples.size() ) ;
51 assert( m_variant_i < m_data_dimension[0] ) ;
52 assert( m_data_dimension[2] >= 3 ) ;
53 assert( m_phased->size() == m_data_dimension[0] ) ;
57 void initialise(
size_t number_of_samples,
size_t number_of_alleles ) {
63 void set_min_max_ploidy(
64 genfile::bgen::uint32_t min_ploidy, genfile::bgen::uint32_t max_ploidy,
65 genfile::bgen::uint32_t min_entries, genfile::bgen::uint32_t max_entries
67 if( max_entries > m_data_dimension[2] ) {
68 throw std::invalid_argument(
"max_entries=" + atoi( max_entries )
69 +
" (expected at most " + atoi( m_data_dimension[2] ) +
")" ) ;
74 bool set_sample(
size_t i ) {
75 if( m_requested_sample_i != m_requested_samples.end() && m_requested_sample_i->first == i ) {
76 m_storage_i = m_requested_sample_i->second ;
77 ++m_requested_sample_i ;
89 void set_number_of_entries(
91 size_t number_of_entries,
92 genfile::OrderType order_type,
93 genfile::ValueType value_type
95 if( value_type != genfile::eProbability ) {
96 throw std::invalid_argument(
98 + atoi( value_type ) +
", expected "
99 + atoi( genfile::eProbability ) +
"=genfile::eProbability)"
102 if( m_order_type == genfile::eUnknownOrderType ) {
103 m_order_type = order_type ;
105 (*m_phased)[ m_variant_i ] = ( m_order_type == genfile::ePerPhasedHaplotypePerAllele ) ;
107 assert( order_type == m_order_type ) ;
109 int const index = m_variant_i + m_storage_i * m_ploidy_dimension[0] ;
110 (*m_ploidy)[ index ] = ploidy ;
113 void set_value( genfile::bgen::uint32_t entry_i,
double value ) {
115 int const index = m_variant_i + m_storage_i * m_data_dimension[0] + entry_i * m_data_dimension[0] * m_data_dimension[1] ;
119 (*m_data)[ index ] = value ;
122 void set_value( genfile::bgen::uint32_t entry_i, genfile::MissingValue value ) {
123 int const index = m_variant_i + m_storage_i * m_data_dimension[0] + entry_i * m_data_dimension[0] * m_data_dimension[1] ;
127 (*m_data)[ index ] = numeric_limits<double>::quiet_NaN() ;
135 vector<int>* m_ploidy ;
136 vector<int>
const m_ploidy_dimension ;
137 vector<double> * m_data ;
138 vector<int>
const m_data_dimension ;
139 vector<bool> * m_phased ;
141 size_t const m_variant_i ;
143 map<size_t, size_t>
const& m_requested_samples ;
144 map<size_t, size_t>::const_iterator m_requested_sample_i ;
147 genfile::OrderType m_order_type ;
150 struct set_sample_names {
151 typedef map< size_t, size_t > SampleIndexMap ;
153 set_sample_names( vector<string>* result, SampleIndexMap* sample_indices ):
155 m_sample_indices( sample_indices ),
158 assert( result != 0 ) ;
159 assert( sample_indices != 0 ) ;
163 void operator()(
string const& value ) {
164 m_sample_indices->insert( std::make_pair( m_index, m_index ) ) ;
165 (*m_result)[m_index++] = value ;
168 vector<string>* m_result ;
169 SampleIndexMap* m_sample_indices ;
173 struct set_requested_sample_names {
174 typedef map<string, size_t> RequestedSamples ;
175 typedef map<size_t, size_t> SampleIndexMap ;
177 set_requested_sample_names(
178 vector<string>* result,
179 SampleIndexMap* sample_indices,
180 RequestedSamples
const& requested_samples
183 m_sample_indices( sample_indices ),
184 m_requested_samples( requested_samples ),
188 assert( result != 0 ) ;
189 assert( sample_indices != 0 ) ;
190 assert( sample_indices->size() == 0 ) ;
191 assert( result->size() == requested_samples.size() ) ;
194 void operator()(
string const& value ) {
195 RequestedSamples::const_iterator where = m_requested_samples.find( value ) ;
196 if( where != m_requested_samples.end() ) {
197 (*m_result)[ where->second ] = value ;
198 m_sample_indices->insert( make_pair( m_value_index, where->second ) ) ;
203 vector<string>* m_result ;
204 SampleIndexMap* m_sample_indices ;
205 RequestedSamples
const& m_requested_samples ;
207 size_t m_value_index ;
211static void get_all_samples(
212 genfile::bgen::View
const& view,
213 size_t* number_of_samples,
214 vector<string>* sampleNames,
215 map<size_t, size_t>* requestedSamplesByIndexInDataIndex
217 *number_of_samples = view.number_of_samples() ;
218 sampleNames->resize( *number_of_samples ) ;
219 view.get_sample_ids( set_sample_names( sampleNames, requestedSamplesByIndexInDataIndex ) ) ;
222static void get_requested_samples(
223 genfile::bgen::View
const& view,
224 vector<string>
const& requestedSamples,
225 size_t* number_of_samples,
226 vector<string>* sampleNames,
227 map<size_t, size_t>* requestedSamplesByIndexInDataIndex
230 map<string, size_t> requestedSamplesByName ;
231 for(
size_t i = 0; i < requestedSamples.size(); ++i ) {
232 requestedSamplesByName.insert( std::map< string, size_t >::value_type(
string( requestedSamples[i] ), i )) ;
234 if( requestedSamplesByName.size() != requestedSamples.size() ) {
235 throw std::invalid_argument(
236 "load_unsafe(): requiredSamples: expected a list of unique samples with no repeats."
240 *number_of_samples = requestedSamples.size() ;
241 sampleNames->resize( requestedSamples.size() ) ;
242 view.get_sample_ids( set_requested_sample_names( sampleNames, requestedSamplesByIndexInDataIndex, requestedSamplesByName ) ) ;
248 set<size_t> checkSamples ;
249 size_t minIndex = std::numeric_limits< size_t >::max() ;
250 size_t maxIndex = 0 ;
253 map<size_t, size_t>::const_iterator p = requestedSamplesByIndexInDataIndex->begin();
254 p != requestedSamplesByIndexInDataIndex->end();
257 checkSamples.insert( p->second ) ;
258 minIndex = std::min( minIndex, p->second ) ;
259 maxIndex = std::max( maxIndex, p->second ) ;
261 if( checkSamples.size() != requestedSamples.size() || minIndex != 0 || maxIndex != (requestedSamples.size()-1) ) {
265 map<size_t, size_t>::const_iterator p = requestedSamplesByIndexInDataIndex->begin();
266 p != requestedSamplesByIndexInDataIndex->end();
272 throw std::invalid_argument(
274 "requested sample not found in file"