1#ifndef MINIMAC4_UNIQUE_HAPLOTYPE_HPP
2#define MINIMAC4_UNIQUE_HAPLOTYPE_HPP
7#include <savvy/reader.hpp>
8#include <savvy/writer.hpp>
34 std::vector<std::int64_t> unique_map_;
42 std::vector<std::size_t> cardinalities_;
51 std::vector<reference_variant> variants_;
110 const std::vector<reference_variant>&
variants()
const {
return variants_; }
120 const std::vector<std::int64_t>&
unique_map()
const {
return unique_map_; }
155 const std::vector<std::size_t>&
cardinalities()
const {
return cardinalities_; }
191 void trim(std::size_t min_pos, std::size_t max_pos);
273 bool deserialize(std::istream& is,
int m3vcf_version, std::size_t n_haplotypes);
294 int deserialize(savvy::reader& input_file, savvy::variant& var);
312 bool serialize(savvy::writer& output_file);
341 std::vector<std::size_t> block_offsets_;
342 std::deque<unique_haplotype_block> blocks_;
343 std::size_t variant_count_ = 0;
344 std::size_t min_block_size_ = 1;
345 std::size_t max_block_size_ = std::numeric_limits<std::size_t>::max();
346 bool flush_block_ =
true;
362 std::size_t block_idx_;
363 std::size_t variant_idx_;
368 variant_idx_(variant_idx)
375 assert(block_idx_ < parent_->blocks_.size());
376 if (variant_idx_ >= parent_->blocks_[block_idx_].variant_size())
393 if (variant_idx_ == 0)
396 if (block_idx_ < parent_->blocks_.size())
398 (*this) =
iterator(*parent_, block_idx_, parent_->blocks_[block_idx_].variant_size());
419 return parent_->blocks_[block_idx_].variants()[variant_idx_];
424 return &(parent_->blocks_[block_idx_].variants()[variant_idx_]);
429 std::size_t
global_idx()
const {
return parent_->block_offsets_[block_idx_] + variant_idx_; }
430 const std::vector<std::int64_t>&
unique_map()
const {
return parent_->blocks_[block_idx_].unique_map(); }
431 const std::vector<std::size_t>&
cardinalities()
const {
return parent_->blocks_[block_idx_].cardinalities(); }
498 const std::deque<unique_haplotype_block>&
blocks()
const {
return blocks_; }
534 return !(lhs == rhs);
A reader and interpolator for genetic map files.
Definition recombination.hpp:211
Iterator for traversing variants within reduced_haplotypes.
Definition unique_haplotype.hpp:359
iterator operator++(int)
Definition unique_haplotype.hpp:384
iterator & operator--()
Definition unique_haplotype.hpp:391
const std::vector< std::size_t > & cardinalities() const
Definition unique_haplotype.hpp:431
iterator & operator++()
Definition unique_haplotype.hpp:372
iterator operator--(int)
Definition unique_haplotype.hpp:410
const reference_variant * operator->() const
Definition unique_haplotype.hpp:422
std::size_t global_idx() const
Definition unique_haplotype.hpp:429
iterator(const reduced_haplotypes &parent, std::size_t block_idx, std::size_t variant_idx)
Definition unique_haplotype.hpp:365
const reference_variant & operator*() const
Definition unique_haplotype.hpp:417
const std::vector< std::int64_t > & unique_map() const
Definition unique_haplotype.hpp:430
std::size_t block_idx() const
Definition unique_haplotype.hpp:427
std::size_t block_local_idx() const
Definition unique_haplotype.hpp:428
reduced_haplotypes()
Definition unique_haplotype.hpp:437
float compression_ratio() const
Calculates the overall compression ratio of all haplotype blocks.
Definition unique_haplotype.cpp:546
void fill_cm(genetic_map_file &map_file)
Fills the centimorgan (cM) values for all variants in all blocks using the provided genetic map.
Definition unique_haplotype.cpp:540
bool compress_variant(const reference_site_info &site_info, const std::vector< std::int8_t > &alleles, bool flush_block=false)
Compresses a variant into the current reduced haplotype block.
Definition unique_haplotype.cpp:482
std::size_t variant_size() const
Returns the total number of variants across all blocks.
Definition unique_haplotype.hpp:505
iterator begin() const
Definition unique_haplotype.hpp:434
iterator end() const
Definition unique_haplotype.hpp:435
void append_block(const unique_haplotype_block &block)
Appends a new haplotype block to the collection of reduced haplotype blocks.
Definition unique_haplotype.cpp:515
const std::deque< unique_haplotype_block > & blocks() const
Accesses the stored haplotype blocks.
Definition unique_haplotype.hpp:498
Represents a block of unique haplotypes and their variants.
Definition unique_haplotype.hpp:25
const std::vector< std::int64_t > & unique_map() const
Get the mapping from haplotypes to unique columns.
Definition unique_haplotype.hpp:120
std::size_t expanded_haplotype_size() const
Get the number of original haplotypes after expansion.
Definition unique_haplotype.hpp:129
void fill_cm(genetic_map_file &map_file)
Fills the centimorgan (cM) values for all variants in the haplotype block.
Definition unique_haplotype.cpp:162
void pop_variant()
Removes the last variant from the haplotype block.
Definition unique_haplotype.cpp:157
const std::vector< std::size_t > & cardinalities() const
Get the haplotype cardinalities across unique columns.
Definition unique_haplotype.hpp:155
void trim(std::size_t min_pos, std::size_t max_pos)
Trims variants outside a specified genomic range.
Definition unique_haplotype.cpp:130
bool compress_variant(const reference_site_info &site_info, const std::vector< std::int8_t > &alleles)
Compress and map haplotype alleles for a new variant into the block.
Definition unique_haplotype.cpp:10
std::size_t unique_haplotype_size() const
Get the number of unique haplotypes represented in the block.
Definition unique_haplotype.hpp:139
std::size_t variant_size() const
Get the number of variants compressed in this block.
Definition unique_haplotype.hpp:145
const std::vector< reference_variant > & variants() const
Get the list of compressed reference variants in this block.
Definition unique_haplotype.hpp:110
void remove_eov()
Removes "end-of-vector" markers from the unique haplotype map.
Definition unique_haplotype.cpp:283
void clear()
Clears all data stored in the haplotype block.
Definition unique_haplotype.cpp:123
void fill_cm_from_recom(double &start_cm)
Fills missing centimorgan (cM) values for variants using recombination probabilities.
Definition unique_haplotype.cpp:168
bool deserialize(std::istream &is, int m3vcf_version, std::size_t n_haplotypes)
Deserialize a unique haplotype block from an input stream (m3vcf format).
Definition unique_haplotype.cpp:300
bool serialize(savvy::writer &output_file)
Serializes the unique haplotype block to a SAVVY output file.
Definition unique_haplotype.cpp:181
Stores information about a site in the reference dataset.
Definition variant.hpp:40
Extends reference_site_info to include genotype data and allele counts.
Definition variant.hpp:76
bool operator==(const reduced_haplotypes::iterator &lhs, const reduced_haplotypes::iterator &rhs)
Compares two iterators for equality.
Definition unique_haplotype.hpp:518
bool operator!=(const reduced_haplotypes::iterator &lhs, const reduced_haplotypes::iterator &rhs)
Compares two iterators for inequality.
Definition unique_haplotype.hpp:532