Minimac4
Loading...
Searching...
No Matches
input_prep.hpp
Go to the documentation of this file.
1#ifndef MINIMAC4_INPUT_PREP_HPP
2#define MINIMAC4_INPUT_PREP_HPP
3
5
6#include <savvy/reader.hpp>
7
26bool stat_tar_panel(const std::string& tar_file_path, std::vector<std::string>& sample_ids);
27
106bool stat_ref_panel(const std::string& ref_file_path, std::string& chrom, std::uint64_t& end_pos);
107
153bool load_target_haplotypes(const std::string& file_path, const savvy::genomic_region& reg, std::vector<target_variant>& target_sites, std::vector<std::string>& sample_ids);
154
190bool load_reference_haplotypes(const std::string& file_path,
191 const savvy::genomic_region& extended_reg,
192 const savvy::genomic_region& impute_reg,
193 const std::unordered_set<std::string>& subset_ids,
194 std::vector<target_variant>& target_sites,
195 reduced_haplotypes& typed_only_reference_data,
196 reduced_haplotypes& full_reference_data,
197 genetic_map_file* map_file,
198 float min_recom,
199 float default_match_error);
200
241bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
242 const savvy::genomic_region& extended_reg,
243 const savvy::genomic_region& impute_reg,
244 const std::unordered_set<std::string>& subset_ids,
245 std::vector<target_variant>& target_sites,
246 reduced_haplotypes& typed_only_reference_data,
247 reduced_haplotypes& full_reference_data,
248 genetic_map_file* map_file);
249
273std::vector<target_variant> separate_target_only_variants(std::vector<target_variant>& target_sites);
274
312bool load_variant_hmm_params(std::vector<target_variant>& tar_variants, reduced_haplotypes& typed_only_reference_data, float default_error_param, float recom_min, const std::string& map_file_path);
313
345std::vector<std::vector<std::vector<std::size_t>>> generate_reverse_maps(const reduced_haplotypes& typed_only_reference_data);
346
383bool convert_old_m3vcf(const std::string& input_path, const std::string& output_path, const std::string& map_file_path = "");
384
414bool compress_reference_panel(const std::string& input_path, const std::string& output_path,
415 std::size_t min_block_size = 10,
416 std::size_t max_block_size = 0xFFFF, // max s1r block size minus 1 partition record
417 std::size_t slope_unit = 10,
418 const std::string& map_file_path = "");
419
420
421#endif // MINIMAC4_INPUT_PREP_HPP
A reader and interpolator for genetic map files.
Definition recombination.hpp:211
Represents a collection of haplotype blocks with reduced storage.
Definition unique_haplotype.hpp:339
bool load_reference_haplotypes(const std::string &file_path, const savvy::genomic_region &extended_reg, const savvy::genomic_region &impute_reg, const std::unordered_set< std::string > &subset_ids, std::vector< target_variant > &target_sites, reduced_haplotypes &typed_only_reference_data, reduced_haplotypes &full_reference_data, genetic_map_file *map_file, float min_recom, float default_match_error)
Load and process reference haplotypes from an MVCF file.
Definition input_prep.cpp:169
std::vector< target_variant > separate_target_only_variants(std::vector< target_variant > &target_sites)
Separates target-only variants from those found in the reference panel.
Definition input_prep.cpp:485
std::vector< std::vector< std::vector< std::size_t > > > generate_reverse_maps(const reduced_haplotypes &typed_only_reference_data)
Generates reverse mapping tables for reduced haplotype blocks.
Definition input_prep.cpp:555
bool stat_ref_panel(const std::string &ref_file_path, std::string &chrom, std::uint64_t &end_pos)
Inspect a reference panel file to determine chromosome and end position.
Definition input_prep.cpp:18
bool convert_old_m3vcf(const std::string &input_path, const std::string &output_path, const std::string &map_file_path="")
Converts an old M3VCF file (v1/v2) to a newer VCF-like format (MVCFv3.0).
Definition input_prep.cpp:579
bool compress_reference_panel(const std::string &input_path, const std::string &output_path, std::size_t min_block_size=10, std::size_t max_block_size=0xFFFF, std::size_t slope_unit=10, const std::string &map_file_path="")
Compress a haplotype reference panel into blocks and write to an output file.
Definition input_prep.cpp:708
bool load_target_haplotypes(const std::string &file_path, const savvy::genomic_region &reg, std::vector< target_variant > &target_sites, std::vector< std::string > &sample_ids)
Load haplotypes from a target file for a given genomic region.
Definition input_prep.cpp:117
bool stat_tar_panel(const std::string &tar_file_path, std::vector< std::string > &sample_ids)
Extract sample IDs from a target panel file.
Definition input_prep.cpp:7
bool load_variant_hmm_params(std::vector< target_variant > &tar_variants, reduced_haplotypes &typed_only_reference_data, float default_error_param, float recom_min, const std::string &map_file_path)
Loads Hidden Markov Model (HMM) parameters for target variants.
Definition input_prep.cpp:510
bool load_reference_haplotypes_old_recom_approach(const std::string &file_path, const savvy::genomic_region &extended_reg, const savvy::genomic_region &impute_reg, const std::unordered_set< std::string > &subset_ids, std::vector< target_variant > &target_sites, reduced_haplotypes &typed_only_reference_data, reduced_haplotypes &full_reference_data, genetic_map_file *map_file)
Loads reference haplotypes using an older recombination-based approach.
Definition input_prep.cpp:293