Variant search

You can search for amino acid of DNA variants in a BIGSI by converting them into sequence probes which can then be queried and genotyped.

1. Setup

## Imports
import copy
import sys 
import json
from bigsi.graph.probabilistic import BIGSI
from bigsi.variants.search import BIGSIVariantSearch
from bigsi.variants.search import BIGSIAminoAcidMutationSearch

bigsi=BIGSI("/ssd0/bigsi/")

Amino acid mutation search

var_search = CBGAminoAcidMutationSearch(bigsi, reference="mtub.fasta", genbank="mtub.gb")
r=var_search.search_for_amino_acid_variant(gene="pncA",ref="W",pos=68,alt="G")
for sample, d in r["pncA_W68G"].items():
    if d.get("genotype")!="0/0":
        print("pncA_W68G", sample, d["genotype"])

var sample genotype
pncA_W68G ERR551464 0/1
pncA_W68G ERR133940 0/1
pncA_W68G SRR1165646 0/1
pncA_W68G SRR1167382 0/1   
...

DNA Variant Search

EXAMPLE_VAR="C371T" ## We'll restrict to 1 variant to test

## This will do the same as above but use the reference 
var_search = CBGVariantSearch(cbg, reference="ref.fasta")
r=var_search.search_for_variant(ref_base="C",pos=371,alt_base="T")
print("var", "sample", "genotype")
for sample, d in r["C371T"].items():
    if d.get("genotype")!="0/0":
        print("C371T", sample, d["genotype"])
        
var sample genotype
C371T SRR1173278 1/1
C371T SRR024051 1/1
C371T SRR1049716 1/1
...