Minimac4
Loading...
Searching...
No Matches
hidden_markov_model.hpp
Go to the documentation of this file.
1#ifndef MINIMAC4_HIDDEN_MARKOV_MODEL_HPP
2#define MINIMAC4_HIDDEN_MARKOV_MODEL_HPP
3
5#include "variant.hpp"
6
7#include <array>
8#include <cassert>
9#include <numeric>
10
20{
21public:
28 std::vector<std::vector<float>> dosages_;
29
36 std::vector<std::vector<float>> loo_dosages_;
37public:
57 void resize(std::size_t n_rows, std::size_t n_loo_rows, std::size_t n_columns)
58 {
59 dosages_.resize(n_rows, std::vector<float>(n_columns, savvy::typed_value::end_of_vector_value<float>()));
60 loo_dosages_.resize(n_loo_rows, std::vector<float>(n_columns, savvy::typed_value::end_of_vector_value<float>()));
61 }
62
78 void fill_eov()
79 {
80 for (auto& v : dosages_)
81 std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
82
83 for (auto& v : loo_dosages_)
84 std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
85 }
86
94 std::array<std::size_t, 2> dimensions() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
95
96
104 std::array<std::size_t, 2> dimensions_loo() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
105
106
114 float& dosage(std::size_t i, std::size_t j) { return dosages_[i][j]; }
115
123 const float& dosage(std::size_t i, std::size_t j) const { return dosages_[i][j]; }
124
132 float& loo_dosage(std::size_t i, std::size_t j) { return loo_dosages_[i][j]; }
133
141 const float& loo_dosage(std::size_t i, std::size_t j) const { return loo_dosages_[i][j]; }
142};
143
160{
161private:
163 std::deque<std::vector<std::vector<float>>> forward_probs_;
164
166 std::deque<std::vector<std::vector<float>>> forward_norecom_probs_;
167
169 std::vector<std::vector<float>> junction_prob_proportions_;
170
172 std::vector<bool> precision_jumps_;
173
175 float prob_threshold_ = 0.01f;
176
178 float s1_prob_threshold_ = -1.f;
179
181 float diff_threshold_ = 0.01f;
182
184 float background_error_ = 1e-5f;
185
187 double decay_ = 0.;
188
190 static constexpr float jump_fix = 1e15f;
191
193 static constexpr float jump_threshold = 1e-10f;
194
196 const std::int16_t bin_scalar_ = 1000;
197
199 std::vector<std::uint32_t> best_s1_haps_;
200
202 std::vector<std::uint32_t> best_s2_haps_;
203
205 std::vector<std::uint32_t> best_s3_haps_;
206
208 std::vector<float> best_s1_probs_;
209
211 std::vector<float> best_s2_probs_;
212
214 std::vector<float> s2_probs_;
215
217 std::vector<std::size_t> s2_cardinalities_;
218
220 std::vector<float> best_s3_probs_;
221
222public:
238 hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay);
239
273 void traverse_forward(const std::deque<unique_haplotype_block>& ref_haps,
274 const std::vector<target_variant>& tar_variant,
275 std::size_t hap_idx);
276
314 void traverse_backward(const std::deque<unique_haplotype_block>& ref_haps,
315 const std::vector<target_variant>& tar_variant,
316 std::size_t hap_idx,
317 std::size_t out_idx,
318 const std::vector<std::vector<std::vector<std::size_t>>>& reverse_maps,
319 full_dosages_results& output,
320 const reduced_haplotypes& full_reference_data);
321private:
348 void condition(std::vector<float>& probs, std::vector<float>& probs_norecom, const std::vector<std::int8_t>& template_haps, std::int8_t observed, float err, float freq);
349
380 bool transpose(const std::vector<float>& from, std::vector<float>& to, const std::vector<float>& from_norecom, std::vector<float>& to_norecom, const std::vector<std::size_t>& uniq_cardinalities, double recom, std::size_t n_templates);
381
423 void impute_typed_site(double& prob_sum, std::size_t& prev_best_hap,
424 const std::vector<float>& left_probs,
425 const std::vector<float>& right_probs,
426 const std::vector<float>& left_probs_norecom,
427 const std::vector<float>& right_probs_norecom,
428 const std::vector<float>& left_junction_proportions,
429 const std::vector<float>& right_junction_proportions,
430 const std::vector<float>& constants,
431 const std::vector<std::vector<std::size_t>>& reverse_map,
432 const std::vector<std::int8_t>& template_haps,
433 std::int8_t observed, float err, float af,
434 std::vector<std::uint32_t>& best_uniq_haps, std::vector<float>& best_uniq_probs, float& dose, float& loo_dose);
435
482 void impute(double& prob_sum, std::size_t& prev_best_expanded_hap,
483 const std::vector<float>& left_probs,
484 const std::vector<float>& right_probs,
485 const std::vector<float>& left_probs_norecom,
486 const std::vector<float>& right_probs_norecom,
487 const std::vector<float>& left_junction_proportions,
488 const std::vector<float>& right_junction_proportions,
489 const std::vector<float>& constants,
490 const std::vector<std::int64_t>& uniq_map,
491 const std::vector<std::vector<std::size_t>>& reverse_map,
492 const std::vector<std::int8_t>& template_haps,
493 const std::vector<target_variant>& tar_variants,
494 std::size_t row, std::size_t column, std::size_t out_column,
495 full_dosages_results& output,
496 reduced_haplotypes::iterator& full_ref_ritr, const reduced_haplotypes::iterator& full_ref_rend,
497 std::size_t& prev_block_idx);
498
499 void initialize_likelihoods(std::vector<float>& probs, std::vector<float>& probs_norecom, std::vector<float>& proportions, const unique_haplotype_block& ref_block);
500
501 void s3_to_s1_probs(
502 const std::vector<float>& left_probs, const std::vector<float>& right_probs,
503 const std::vector<float>& left_probs_norecom, const std::vector<float>& right_probs_norecom,
504 const std::vector<float>& left_junction_proportions, const std::vector<float>& right_junction_proportions,
505 const std::vector<std::vector<std::size_t>>& s3_reverse_map, double prob_sum);
506 void s1_to_s2_probs(std::vector<std::size_t>& cardinalities, const std::vector<std::int64_t>& uniq_map, std::size_t s2_size);
507};
508
509#endif // MINIMAC4_HIDDEN_MARKOV_MODEL_HPP
Stores full and leave-one-out (LOO) dosages for imputed variants.
Definition hidden_markov_model.hpp:20
void fill_eov()
Fills all dosage matrices with the end-of-vector sentinel value.
Definition hidden_markov_model.hpp:78
float & loo_dosage(std::size_t i, std::size_t j)
Accesses an element of the leave-one-out dosages matrix (modifiable).
Definition hidden_markov_model.hpp:132
std::vector< std::vector< float > > loo_dosages_
Matrix of leave-one-out (LOO) dosages.
Definition hidden_markov_model.hpp:36
std::array< std::size_t, 2 > dimensions() const
Returns the dimensions of the main dosages matrix.
Definition hidden_markov_model.hpp:94
std::array< std::size_t, 2 > dimensions_loo() const
Returns the dimensions of the leave-one-out dosages matrix.
Definition hidden_markov_model.hpp:104
const float & loo_dosage(std::size_t i, std::size_t j) const
Accesses an element of the leave-one-out dosages matrix (const version).
Definition hidden_markov_model.hpp:141
const float & dosage(std::size_t i, std::size_t j) const
Accesses an element of the main dosages matrix (const version).
Definition hidden_markov_model.hpp:123
std::vector< std::vector< float > > dosages_
Matrix of imputed dosages.
Definition hidden_markov_model.hpp:28
void resize(std::size_t n_rows, std::size_t n_loo_rows, std::size_t n_columns)
Resizes the dosage matrices to the specified dimensions.
Definition hidden_markov_model.hpp:57
float & dosage(std::size_t i, std::size_t j)
Accesses an element of the main dosages matrix (modifiable).
Definition hidden_markov_model.hpp:114
void traverse_backward(const std::deque< unique_haplotype_block > &ref_haps, const std::vector< target_variant > &tar_variant, std::size_t hap_idx, std::size_t out_idx, const std::vector< std::vector< std::vector< std::size_t > > > &reverse_maps, full_dosages_results &output, const reduced_haplotypes &full_reference_data)
Performs a backward traversal over reference haplotypes to compute posterior probabilities.
Definition hidden_markov_model.cpp:121
void traverse_forward(const std::deque< unique_haplotype_block > &ref_haps, const std::vector< target_variant > &tar_variant, std::size_t hap_idx)
Performs a forward traversal over reference haplotypes for a given target haplotype.
Definition hidden_markov_model.cpp:34
hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay)
Constructs a Hidden Markov Model with specified parameters.
Definition hidden_markov_model.cpp:11
Iterator for traversing variants within reduced_haplotypes.
Definition unique_haplotype.hpp:359
Represents a collection of haplotype blocks with reduced storage.
Definition unique_haplotype.hpp:339
Represents a block of unique haplotypes and their variants.
Definition unique_haplotype.hpp:25