--- title: Runner module keywords: fastai sidebar: home_sidebar summary: "API details." description: "API details." nb_path: "nbs/01_Runner.ipynb" ---
{% raw %}
{% endraw %} {% raw %}

format[source]

format(tpeds, tfam, prev=None, wild_pen=None, muta_pen=None, out_format='MERLIN', inherit_mode=None, theta_max=None, theta_inc=None)

{% endraw %} {% raw %}

format_plink(tped, tfam)

{% endraw %} {% raw %}

format_mega2[source]

format_mega2(tped, tfam)

{% endraw %} {% raw %}

format_merlin[source]

format_merlin(tped, tfam)

{% endraw %} {% raw %}

format_linkage[source]

format_linkage(tped, tfam, prev, wild_pen, muta_pen, inherit_mode, theta_max, theta_inc)

{% endraw %} {% raw %}

parse_tfam[source]

parse_tfam(fh)

{% endraw %} {% raw %}

class Pedigree[source]

Pedigree()

{% endraw %} {% raw %}

run_linkage[source]

run_linkage(blueprint, theta_inc, theta_max, to_plot=True)

{% endraw %} {% raw %}

linkage_worker[source]

linkage_worker(blueprint, workdir, theta_inc, theta_max, errfile, to_plot=True)

{% endraw %} {% raw %}

hinton[source]

hinton(filename, max_weight=None, ax=None)

{% endraw %} {% raw %}

heatmap[source]

heatmap(file, theta_inc, theta_max)

{% endraw %} {% raw %}

hlod_fun[source]

hlod_fun(Li, sign=1)

{% endraw %} {% raw %}

html[source]

html(theta_inc, theta_max, limit)

{% endraw %} {% raw %}

html_img[source]

html_img(ltype)

{% endraw %} {% raw %}

html_table[source]

html_table(type, theta_inc, theta_max, limit)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
tped = '../seqtest/CACHE/seqtest.chr16.tped'
tfam = '../seqtest/CACHE/seqtest.tfam'
with open(tped) as tped_fh, open(tfam) as tfam_fh:
    fams = parse_tfam(tfam_fh)

    for line in tped_fh:
        s = line.strip().split()
        gene, gno = re.search(r'^(\S+?)(?:\[(\d+)\])?$', s[1]).groups()
        print(gene,gno)
MC1R 1
MC1R 2
{% endraw %}

Testing runner

1. parameters

{% raw %}
from SEQLinkage.Main import *
from __future__ import print_function
from SEQLinkage.Utils import *
from multiprocessing import Process, Queue
from collections import OrderedDict
import itertools
from copy import deepcopy
import sys, faulthandler, platform
import numpy as np
import os
if sys.version_info.major == 2:
    from cstatgen import cstatgen_py2 as cstatgen
    from cstatgen.egglib import Align
else:
    from cstatgen import cstatgen_py3 as cstatgen
    import egglib
    from egglib import Align
{% endraw %}

args = Args().parser.parse_args('--fam data/mwe_normal_fam.csv --vcf data/first1000snp_full_samples.vcf.gz -f MERLIN --blueprint data/genemap.hg38.txt --freq AF --run-linkage -K 0.001 --moi AD -W 0 -M 1 --theta-max 0.5 --theta-inc 0.05'.split())

{% raw %}
args = Args().parser.parse_args('--fam ../seqlinkage-example/seqlinkage-example.fam --vcf ../seqlinkage-example/seqlinkage-example.vcf.gz -f MERLIN --blueprint ../data/genemap.txt --freq EVSEAAF -o seqtest --run-linkage -K 0.001 --moi AD -W 0 -M 1 --theta-max 0.5 --theta-inc 0.05'.split())
{% endraw %} {% raw %}
args
Namespace(bin=0.8, blueprint='../data/genemap.txt', single_markers=False, tfam='../seqlinkage-example/seqlinkage-example.fam', vcf='../seqlinkage-example/seqlinkage-example.vcf.gz', build='hg19', prephased=False, freq='EVSEAAF', freq_by_fam=None, mle=False, rvhaplo=False, recomb_max=1, recomb_cross_fam=False, rsq=0.0, include_vars=None, maf_cutoff=1.0, chr_prefix=None, output='seqtest', format=['MERLIN'], prevalence=0.001, inherit_mode='AD', wild_pen=0.0, muta_pen=1.0, theta_max=0.5, theta_inc=0.05, run_linkage=True, output_limit=10, jobs=16, tempdir=None, vanilla=True, quiet=False, debug=False, no_save=False, func=<function main at 0x2aaf40c79b80>)
{% endraw %} {% raw %}
checkParams(args)
MESSAGE: Binary trait detected in [/mnt/mfs/statgen/yin/Github/linkage/SEQpy3/seqlinkage-example/seqlinkage-example.fam]
True
{% endraw %} {% raw %}
if args.no_save:
    cache = NoCache()
else:
    cache = Cache(env.cache_dir, env.output, vars(args))
cache.setID('vcf')
{% endraw %}

2. Testing run_linkage

{% raw %}
args.run_linkage
True
{% endraw %} {% raw %}
cache.setID('analysis')
{% endraw %} {% raw %}
env.output
'seqtest'
{% endraw %} {% raw %}
?cache.load
Signature: cache.load(target_dir=None, names=None)
Docstring: <no docstring>
File:      /mnt/mfs/statgen/yin/Github/linkage/SEQpy3/SEQLinkage/Utils.py
Type:      method
{% endraw %} {% raw %}
cache.cache_name
'cache/seqtest.cache'
{% endraw %} {% raw %}
not args.vanilla and cache.check()
False
{% endraw %} {% raw %}
fmt = args.format[0]
{% endraw %} {% raw %}
args.blueprint
'data/genemap.hg38.txt'
{% endraw %} {% raw %}
args.theta_inc
0.05
{% endraw %} {% raw %}
args.theta_max
0.5
{% endraw %} {% raw %}
args.output_limit
10
{% endraw %} {% raw %}
??run_linkage
Signature: run_linkage(blueprint, theta_inc, theta_max, to_plot=True)
Docstring: <no docstring>
Source:   
def run_linkage(blueprint, theta_inc, theta_max, to_plot = True):
    try:
        remove_tree(os.path.join(env.outdir, 'heatmap'))
    except OSError:
        pass
    with open(os.path.join(env.tmp_dir, 'LinkageRuntimeError.txt'), 'w') as runtime_err:
        workdirs = glob.glob('{}/LINKAGE/{}.chr*'.format(env.tmp_dir, env.output))
        parmap(lambda x: linkage_worker(blueprint, x, theta_inc, theta_max, runtime_err, to_plot) , workdirs, env.jobs)
File:      /tmp/2206534.1.plot.q/ipykernel_30528/2972776299.py
Type:      function
{% endraw %} {% raw %}
LINKAGE/LINKAGE$ head LINKAGE.chr1/DDX11L1/1036/*
{% endraw %} {% raw %}
env.tmp_dir = './LINKAGE'
{% endraw %} {% raw %}
env.jobs=8
{% endraw %} {% raw %}
env.tmp_dir
'./LINKAGE'
{% endraw %} {% raw %}
env.output
'LINKAGE'
{% endraw %} {% raw %}
with open(os.path.join(env.tmp_dir, 'LinkageRuntimeError.txt'), 'w') as runtime_err:
    workdirs = glob.glob('{}/LINKAGE/{}.chr*'.format(env.tmp_dir, env.output))
    print(workdirs)
    linkage_worker(blueprint, workdirs[0], theta_inc, theta_max, runtime_err, to_plot)
['./LINKAGE/LINKAGE/LINKAGE.chr1']
{% endraw %} {% raw %}
tmp = {'a':1,'b':2}
{% endraw %} {% raw %}
tmp.values()
dict_values([1, 2])
{% endraw %}

Testing linkage_worker by lines

{% raw %}
blueprint, theta_inc, theta_max, to_plot = args.blueprint, args.theta_inc, args.theta_max, True
{% endraw %} {% raw %}
import glob
{% endraw %} {% raw %}
workdirs = glob.glob('{}/LINKAGE/{}.chr*'.format(env., env.output))
{% endraw %} {% raw %}
workdir = '../seqtest/LINKAGE/seqtest.chr16/'
{% endraw %} {% raw %}
env.tmp_dir
'/tmp/2206534.1.plot.q/SEQLinkage_tmp_78m1_7qv'
{% endraw %} {% raw %}
env.outdir
'seqtest'
{% endraw %} {% raw %}
genemap = {}
if blueprint:
    with open(blueprint) as f:
        for line in f.readlines():
            chrID, start, end, gene = line.strip().split()[:4]
            genemap[gene] = [chrID, int(start), int(end)]
{% endraw %} {% raw %}
with env.lock:
    mkpath('{}/heatmap'.format(env.outdir))
lods_fh = open('{}/heatmap/{}.lods'.format(env.outdir, basename(workdir)), 'w')
{% endraw %} {% raw %}
hlods_fh = open('{}/heatmap/{}.hlods'.format(env.outdir, basename(workdir)), 'w')
famlods_fh = open('{}/heatmap/{}.family-lods'.format(env.outdir, basename(workdir)), 'w')
{% endraw %} {% raw %}
genes = list(filter(lambda g: g in genemap, map(basename, glob.glob(workdir + '/*'))))
{% endraw %} {% raw %}
gene = genes[0]
{% endraw %} {% raw %}
lods = {}
hlods = {}
fams = list(map(basename, filter(isdir, glob.glob('{}/{}/*'.format(workdir, gene)))))
{% endraw %} {% raw %}
fams
['2', '1']
{% endraw %} {% raw %}
for fam in fams:
    with cd('{}/{}/{}'.format(workdir, gene, fam)):
        units = map(lambda x: re.sub(r'^(\d+?)\.PRE$', r'\1', x) ,glob.glob('*.PRE'))
        for unit in units:
            copy_file('{}.LOC'.format(unit), 'datafile.dat')
            copy_file('{}.PRE'.format(unit), 'pedfile.pre')

            step1 = runCommand(['makeped', 'pedfile.pre', 'pedfile.ped', 'n'],
                               show_stderr = False, return_zero = False)
            if step1[1]:
                if env.debug:
                    with env.lock:
                        print("makeped error:", step1[1], file = sys.stderr)
                with env.makeped_counter.get_lock():
                    env.makeped_counter.value += 1
                with env.lock:
                    errfile.write(step1[1])
                continue
            step2 = runCommand(['pedcheck', '-p', 'pedfile.ped', '-d', 'datafile.dat', '-c'],
                               show_stderr = False, return_zero = False)
            if step2[1]:
                lines = [x for x in step2[1].split('\n')
                         if not x.strip().startswith('*') and x.strip()]
                if len(lines) > 0:
                    env.log('{} lines'.format(len(lines)))
                    with env.lock:
                        errfile.write(step2[1])
                    if env.debug:
                        with env.lock:
                            print("pedcheck error:", '\n'.join(lines), file = sys.stderr)
            try:
                copy_file('zeroout.dat', 'pedfile.dat')
            except:
                with env.pedcheck_counter.get_lock():
                    env.pedcheck_counter.value += 1
                continue
            step3 = runCommand('unknown', show_stderr = False, return_zero = False)
            if step3[1]:
                if env.debug:
                    with env.lock:
                        print("unkn error:", step3[1], file = sys.stderr)
                with env.unknown_counter.get_lock():
                    env.unknown_counter.value += 1
                with env.lock:
                    errfile.write(step3[1])
                continue
            step4 = runCommand('mlink', show_stderr = False, return_zero = False)
            if step4[1]:
                if env.debug:
                    with env.lock:
                        print("mlink error:", step4[1], file = sys.stderr)
                with env.lock:
                    errfile.write(step4[1])
            try:
                copy_file('outfile.dat', '{}.out'.format(unit))
            except:
                with env.mlink_counter.get_lock():
                    env.mlink_counter.value += 1
                continue
            #clean linkage tmp files
            for f in set(glob.glob('*.dat') + glob.glob('ped*') + ['names.tmp']):
                os.remove(f)
            #collect lod scores of different thelta for the fam
            with open('{}.out'.format(unit)) as out:
                raw = out.read()
                for i in re.finditer(r'^THETAS\s+(0\.\d+)(?:\n.+?){7}LOD SCORE =\s+(-?\d+\.\d+)', raw, re.MULTILINE):
                    theta, lod = map(float, i.group(1,2))
                    #if float(lod) < 1e-6:
                    #    lod = 0
                    if theta not in lods:
                        lods[theta] = {fam: lod}
                    elif fam not in lods[theta] or lod > lods[theta][fam]:
                        lods[theta][fam] = lod
{% endraw %} {% raw %}
lods
{0.5: {'2': 0.0, '1': 0.0},
 0.0: {'2': -4.342935563865039e+19, '1': -4.342935563865039e+19},
 0.05: {'2': -0.617441, '1': -0.102085},
 0.1: {'2': -0.359717, '1': 0.06997},
 0.15: {'2': -0.226725, '1': 0.119647},
 0.2: {'2': -0.144602, '1': 0.122476},
 0.25: {'2': -0.090176, '1': 0.103643},
 0.3: {'2': -0.053155, '1': 0.075761},
 0.35: {'2': -0.028121, '1': 0.046732},
 0.4: {'2': -0.011976, '1': 0.02209},
 0.45: {'2': -0.00292, '1': 0.005723}}
{% endraw %} {% raw %}
def run_linkage(blueprint, theta_inc, theta_max, to_plot = True):
    try:
        remove_tree(os.path.join(env.outdir, 'heatmap'))
    except OSError:
        pass
    with open(os.path.join(env.tmp_dir, 'LinkageRuntimeError.txt'), 'w') as runtime_err:
        workdirs = glob.glob('{}/LINKAGE/{}.chr*'.format(env.tmp_dir, env.output))
        parmap(lambda x: linkage_worker(blueprint, x, theta_inc, theta_max, runtime_err, to_plot) , workdirs, env.jobs)

def linkage_worker(blueprint, workdir, theta_inc, theta_max, errfile, to_plot = True):
    #env.log("Start running LINKAGE for {} ...".format(workdir), flush=True)
    #hash genes into genemap
    genemap = {}
    if blueprint:
        with open(blueprint) as f:
            for line in f.readlines():
                chrID, start, end, gene = line.strip().split()[:4]
                genemap[gene] = [chrID, int(start), int(end)]
    else:
        tped = os.path.join(env.tmp_cache, basename(workdir) + '.tped')
        with open(tped) as f:
            for line in f.readlines():
                items = line.strip().split()[:4]
                chrID = items[0]
                gene = items[1]
                pos = items[3]
                genemap[gene] = [chrID, int(pos), int(pos)+1]
    with env.lock:
        mkpath('{}/heatmap'.format(env.outdir))
    lods_fh = open('{}/heatmap/{}.lods'.format(env.outdir, basename(workdir)), 'w')
    hlods_fh = open('{}/heatmap/{}.hlods'.format(env.outdir, basename(workdir)), 'w')
    famlods_fh = open('{}/heatmap/{}.family-lods'.format(env.outdir, basename(workdir)), 'w')
    genes = list(filter(lambda g: g in genemap, map(basename, glob.glob(workdir + '/*'))))
    for gene in sorted(genes, key=lambda g: genemap[g]):
        lods = {}
        hlods = {}
        fams = map(basename, filter(isdir, glob.glob('{}/{}/*'.format(workdir, gene))))
        for fam in fams:
            with cd('{}/{}/{}'.format(workdir, gene, fam)):
                units = map(lambda x: re.sub(r'^(\d+?)\.PRE$', r'\1', x) ,glob.glob('*.PRE'))
                for unit in units:
                    copy_file('{}.LOC'.format(unit), 'datafile.dat')
                    copy_file('{}.PRE'.format(unit), 'pedfile.pre')

                    step1 = runCommand(['makeped', 'pedfile.pre', 'pedfile.ped', 'n'],
                                       show_stderr = False, return_zero = False)
                    if step1[1]:
                        if env.debug:
                            with env.lock:
                                print("makeped error:", step1[1], file = sys.stderr)
                        with env.makeped_counter.get_lock():
                            env.makeped_counter.value += 1
                        with env.lock:
                            errfile.write(step1[1])
                        continue
                    step2 = runCommand(['pedcheck', '-p', 'pedfile.ped', '-d', 'datafile.dat', '-c'],
                                       show_stderr = False, return_zero = False)
                    if step2[1]:
                        lines = [x for x in step2[1].split('\n')
                                 if not x.strip().startswith('*') and x.strip()]
                        if len(lines) > 0:
                            env.log('{} lines'.format(len(lines)))
                            with env.lock:
                                errfile.write(step2[1])
                            if env.debug:
                                with env.lock:
                                    print("pedcheck error:", '\n'.join(lines), file = sys.stderr)
                    try:
                        copy_file('zeroout.dat', 'pedfile.dat')
                    except:
                        with env.pedcheck_counter.get_lock():
                            env.pedcheck_counter.value += 1
                        continue
                    step3 = runCommand('unknown', show_stderr = False, return_zero = False)
                    if step3[1]:
                        if env.debug:
                            with env.lock:
                                print("unkn error:", step3[1], file = sys.stderr)
                        with env.unknown_counter.get_lock():
                            env.unknown_counter.value += 1
                        with env.lock:
                            errfile.write(step3[1])
                        continue
                    step4 = runCommand('mlink', show_stderr = False, return_zero = False)
                    if step4[1]:
                        if env.debug:
                            with env.lock:
                                print("mlink error:", step4[1], file = sys.stderr)
                        with env.lock:
                            errfile.write(step4[1])
                    try:
                        copy_file('outfile.dat', '{}.out'.format(unit))
                    except:
                        with env.mlink_counter.get_lock():
                            env.mlink_counter.value += 1
                        continue
                    #clean linkage tmp files
                    for f in set(glob.glob('*.dat') + glob.glob('ped*') + ['names.tmp']):
                        os.remove(f)
                    #collect lod scores of different thelta for the fam
                    with open('{}.out'.format(unit)) as out:
                        raw = out.read()
                        for i in re.finditer(r'^THETAS\s+(0\.\d+)(?:\n.+?){7}LOD SCORE =\s+(-?\d+\.\d+)', raw, re.MULTILINE):
                            theta, lod = map(float, i.group(1,2))
                            #if float(lod) < 1e-6:
                            #    lod = 0
                            if theta not in lods:
                                lods[theta] = {fam: lod}
                            elif fam not in lods[theta] or lod > lods[theta][fam]:
                                lods[theta][fam] = lod
        for theta in sorted(lods.keys()):
            lods_fh.write('{} {} {} {}\n'.format(gene, ' '.join(map(str, genemap[gene])), theta, sum(lods[theta].values())))
            for fam in lods[theta]:
                famlods_fh.write('{} {} {} {} {}\n'.format(fam, gene, ' '.join(map(str, genemap[gene])), theta, lods[theta][fam]))
            print(lods[theta].values())
            res = minimize_scalar(hlod_fun(lods[theta].values(), -1), bounds=(0,1), method='bounded', options={'xatol':1e-8})
            a = res.x
            hlods_fh.write('{} {} {} {} {}\n'.format(gene, ' '.join(map(str, genemap[gene])), a, theta, hlod_fun(lods[theta].values())(a)))
        with env.run_counter.get_lock():
            env.run_counter.value += 1
        if env.run_counter.value % (env.batch * env.jobs) == 0:
            env.log('Linkage analysis for {:,d} units completed {{{:.2%}}} ...'.format(env.run_counter.value, float(env.run_counter.value)/env.success_counter.value), flush=True)
    lods_fh.close()
    hlods_fh.close()
    famlods_fh.close()
    if to_plot:
        heatmap('{}/heatmap/{}.lods'.format(env.outdir, basename(workdir)), theta_inc, theta_max)
        heatmap('{}/heatmap/{}.hlods'.format(env.outdir, basename(workdir)), theta_inc, theta_max)
    #env.log("Finished running LINKAGE for {}.".format(workdir), flush=True)
{% endraw %} {% raw %}
hlod_fun(lods[theta].values())(a)
0.0
{% endraw %} {% raw %}
res = minimize_scalar(hlod_fun(lods[theta].values(), -1), bounds=(0,1), method='bounded', options={'xatol':1e-8})
a = res.x
{% endraw %} {% raw %}
a
0.9522536041612546
{% endraw %} {% raw %}
??hlod_fun
Signature: hlod_fun(Li, sign=1)
Docstring: <no docstring>
Source:   
def hlod_fun(Li, sign=1):
    def _fun(alpha):
        return sign * sum(np.log10(alpha*np.power(10, list(Li)) + 1 - alpha))
    return _fun
File:      /tmp/2206534.1.plot.q/ipykernel_25555/602110876.py
Type:      function
{% endraw %} {% raw %}
run_linkage(args.blueprint, args.theta_inc, args.theta_max, args.output_limit)
{% endraw %} {% raw %}
import sys, os, subprocess, shutil, glob, shlex, re, hashlib, tempfile
try:
    from cStringIO import StringIO ## for Python 2
    import urlparse
except ImportError:
    from io import StringIO ## for Python 3
    from urllib import parse as urlparse
from contextlib import contextmanager
from multiprocessing import Pool, Process, Queue, Lock, Value, cpu_count
import itertools
from collections import OrderedDict, defaultdict, Counter
from shutil import rmtree as remove_tree
from zipfile import ZipFile
{% endraw %} {% raw %}
cpu_count()
32
{% endraw %} {% raw %}
def parmap(f, X, nprocs = cpu_count()):
    def spawn(f):
        def fun(q_in,q_out):
            while True:
                i,x = q_in.get()
                if i is None:
                    break
                q_out.put((i,f(x)))
        return fun
    #
    q_in   = Queue(1)
    q_out  = Queue()
    proc = [Process(target=spawn(f),args=(q_in,q_out)) for _ in range(nprocs)]
    for p in proc:
        p.daemon = True
        p.start()
    sent = [q_in.put((i,x)) for i,x in enumerate(X)]
    [q_in.put((None,None)) for _ in range(nprocs)]
    res = [q_out.get() for _ in range(len(sent))]
    [p.join() for p in proc]
    return [x for i,x in sorted(res)]
{% endraw %} {% raw %}
if args.run_linkage:
    cache.setID('analysis')
    if not args.vanilla and cache.check():
        env.log('Loading linkage analysis result from archive ...'.format(fmt.upper()))
        cache.load(target_dir = env.output, names = ['heatmap'])
    else:
        env.log('Running linkage analysis ...'.format(fmt.upper()))
        run_linkage(args.blueprint, args.theta_inc, args.theta_max, args.output_limit)
        env.log('Linkage analysis succesfully performed for {:,d} units\n'.\
                format(env.run_counter.value, fmt.upper()), flush = True)
        if env.makeped_counter.value:
            env.log('{} "makeped" runtime errors occurred'.format(env.makeped_counter.value))
        if env.pedcheck_counter.value:
            env.log('{} "pedcheck" runtime errors occurred'.format(env.pedcheck_counter.value))
        if env.unknown_counter.value:
            env.log('{} "unknown" runtime errors occurred'.format(env.unknown_counter.value))
        if env.mlink_counter.value:
            env.log('{} "mlink" runtime errors occurred'.format(env.mlink_counter.value))
        cache.write(arcroot = 'heatmap', source_dir = os.path.join(env.output, 'heatmap'), mode = 'a')
    html(args.theta_inc, args.theta_max, args.output_limit)
else:
    env.log('Saving data to [{}]'.format(os.path.abspath(env.output)))
    cache.load(target_dir = env.output, names = [fmt.upper() for fmt in args.format])
MESSAGE: Running linkage analysis ...
MESSAGE: Linkage analysis succesfully performed for 1 units
MESSAGE: 2 "pedcheck" runtime errors occurred
MESSAGE: Report for [tsq20211129] is generated in HTML format
{% endraw %} {% raw %}
    env.log('Saving data to [{}]'.format(os.path.abspath(env.output)))
    cache.load(target_dir = env.output, names = [fmt.upper() for fmt in args.format])
MESSAGE: Saving data to [/mnt/mfs/statgen/yin/Github/linkage/SEQpy2/testseqlink]
{% endraw %}

3.Testing heatmap

{% raw %}
def hinton(filename, max_weight=None, ax=None):
    if ax is None:
        ax = plt.gca()
    matrix = np.random.rand(20, 20) - 0.5
    if not max_weight:
        max_weight = 2**np.ceil(np.log(np.abs(matrix).max())/np.log(2))
    ax.patch.set_facecolor('gray')
    ax.set_aspect('equal', 'box')
    ax.xaxis.set_major_locator(plt.NullLocator())
    ax.yaxis.set_major_locator(plt.NullLocator())
    chrID = re.search(r'\.chr([0-9XY]+)\.', filename).group(1)
    ax.set_title('Chromosome {}'.format(chrID))
    for (x,y),w in np.ndenumerate(matrix):
        color = 'white' if w > 0 else 'black'
        size = np.sqrt(np.abs(w))
        rect = plt.Rectangle([x - size / 2, y - size / 2], size, size,
                             facecolor=color, edgecolor=color)
        ax.add_patch(rect)
    ax.autoscale_view()
    ax.invert_yaxis()
    plt.savefig(filename)


def heatmap(file, theta_inc, theta_max):
    #env.log("Start ploting heatmap for {} ...".format(file), flush=True)
    if os.path.getsize(file) == 0:
        hinton('{}.png'.format(file))
        return
    lods = []
    with open(file, 'r') as f:
        for line in f.readlines():
            theta,lod = line.split()[-2:]
            if float(theta) >= theta_max:
                print(line)
                continue
            lods.append(lod)
        #add random number.
        if max(lods) == min(lods):
            #env.log('Max equals Min for [{}], No real heatmap will be generated.'.format(file))
            hinton('{}.png'.format(file))
            return
        Num=int(round(theta_max/theta_inc))
        lods = np.array(list(map(float,lods))).reshape((-1,Num))
        chrID = re.search(r'\.chr([0-9XY]+)\.', file).group(1)
        fig, ax = plt.subplots(1)
        ax.set_title('Chromosome {}'.format(chrID))
        plt.pcolormesh(fig,ax,lods.transpose(),
                       xticklabels=[''] * len(lods),
                       yticklabels=np.round(np.array(range(Num)) * theta_inc,2).tolist(),
                       cmap=brewer2mpl.get_map('Blues', 'Sequential', 9).mpl_colormap)
        fig.savefig('{}.png'.format(file))
{% endraw %} {% raw %}
theta_inc
0.05
{% endraw %} {% raw %}
Num=int(round(theta_max/theta_inc))
{% endraw %} {% raw %}
Num
10
{% endraw %} {% raw %}
lods = ['0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0']
{% endraw %} {% raw %}
lods = np.array(list(map(float, lods))).reshape((-1,Num))
{% endraw %} {% raw %}
lods.shape
(4, 10)
{% endraw %} {% raw %}
len(lods)
4
{% endraw %} {% raw %}
[''] * len(lods)
['', '', '', '']
{% endraw %} {% raw %}
np.round(np.array(range(Num)) * theta_inc,2).tolist()
[0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
{% endraw %} {% raw %}
brewer2mpl.get_map('Blues', 'Sequential', 9).mpl_colormap[1]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/2206534.1.plot.q/ipykernel_30528/241223039.py in <module>
----> 1 brewer2mpl.get_map('Blues', 'Sequential', 9).mpl_colormap[1]

TypeError: 'LinearSegmentedColormap' object is not subscriptable
{% endraw %} {% raw %}
lods = np.random.rand(4,10)
{% endraw %} {% raw %}
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
{% endraw %} {% raw %}
plt.pcolormesh(lods)
<matplotlib.collections.QuadMesh at 0x2b7a0b1f2be0>
{% endraw %} {% raw %}
dx, dy = 1, 0.05

# generate 2 2d grids for the x & y bounds
y, x = np.mgrid[slice(0, 0.5 + dy, dy),
                slice(0, 4 + dx, dx)]
{% endraw %} {% raw %}
plt.pcolormesh(x,y,lods.transpose(),cmap=brewer2mpl.get_map('Blues', 'Sequential', 9).mpl_colormap)
<matplotlib.collections.QuadMesh at 0x2b7a0b397370>
{% endraw %} {% raw %}
file = '{}/heatmap/{}.lods'.format(env.outdir, basename(workdirs[0]))
chrID = re.search(r'\.chr([0-9XY]+)\.', file).group(1)
fig, ax = plt.subplots(1)
ax.set_title('Chromosome {}'.format(chrID))
plt.pcolormesh(fig,ax,lods.transpose(),
               xticklabels=[''] * len(lods),
               yticklabels=np.round(np.array(range(Num)) * theta_inc,2).tolist(),
               cmap=brewer2mpl.get_map('Blues', 'Sequential', 9).mpl_colormap)
fig.savefig('{}.png'.format(file))
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
/tmp/2206534.1.plot.q/ipykernel_30528/3434994219.py in <module>
      3 fig, ax = plt.subplots(1)
      4 ax.set_title('Chromosome {}'.format(chrID))
----> 5 plt.pcolormesh(fig,ax,lods.transpose(),
      6                xticklabels=[''] * len(lods),
      7                yticklabels=np.round(np.array(range(Num)) * theta_inc,2).tolist(),

~/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/matplotlib/pyplot.py in pcolormesh(alpha, norm, cmap, vmin, vmax, shading, antialiased, data, *args, **kwargs)
   2716         vmax=None, shading=None, antialiased=False, data=None,
   2717         **kwargs):
-> 2718     __ret = gca().pcolormesh(
   2719         *args, alpha=alpha, norm=norm, cmap=cmap, vmin=vmin,
   2720         vmax=vmax, shading=shading, antialiased=antialiased,

~/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
   1410     def inner(ax, *args, data=None, **kwargs):
   1411         if data is None:
-> 1412             return func(ax, *map(sanitize_sequence, args), **kwargs)
   1413 
   1414         bound = new_sig.bind(ax, *args, **kwargs)

~/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/matplotlib/axes/_axes.py in pcolormesh(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)
   6014         kwargs.setdefault('edgecolors', 'none')
   6015 
-> 6016         X, Y, C, shading = self._pcolorargs('pcolormesh', *args,
   6017                                             shading=shading, kwargs=kwargs)
   6018         coords = np.stack([X, Y], axis=-1)

~/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/matplotlib/axes/_axes.py in _pcolorargs(self, funcname, shading, *args, **kwargs)
   5507                             f'but {len(args)} were given')
   5508 
-> 5509         Nx = X.shape[-1]
   5510         Ny = Y.shape[0]
   5511         if X.ndim != 2 or X.shape[0] == 1:

IndexError: tuple index out of range
{% endraw %} {% raw %}
heatmap('{}/heatmap/{}.lods'.format(env.outdir, basename(workdirs[0])), theta_inc, theta_max)
#heatmap('{}/heatmap/{}.hlods'.format(env.outdir, basename(workdir)), theta_inc, theta_max)
LOC102725121@1 1 11868 14362 0.5 0.0

DDX11L1 1 11873 14409 0.5 0.0

WASH7P 1 14361 29370 0.5 0.0

MIR6859-1@1,MIR6859-2@1,MIR6859-3@1,MIR6859-4@1 1 17368 17436 0.5 0.0

['0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0']
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/2206534.1.plot.q/ipykernel_30528/3244704223.py in <module>
----> 1 heatmap('{}/heatmap/{}.lods'.format(env.outdir, basename(workdirs[0])), theta_inc, theta_max)
      2 #heatmap('{}/heatmap/{}.hlods'.format(env.outdir, basename(workdir)), theta_inc, theta_max)

/tmp/2206534.1.plot.q/ipykernel_30528/1950724175.py in heatmap(file, theta_inc, theta_max)
     41             #return
     42         Num=int(round(theta_max/theta_inc))
---> 43         lods = np.array(map(float,lods)).reshape((-1,Num))
     44         chrID = re.search(r'\.chr([0-9XY]+)\.', file).group(1)
     45         fig, ax = plt.subplots(1)

ValueError: cannot reshape array of size 1 into shape (10)
{% endraw %} {% raw %}
workdirs
['./LINKAGE/LINKAGE/LINKAGE.chr1']
{% endraw %}