A minimal coevo analysis

This example shows how to perform a minimal co-evolutionary analysis using the SCA approach. This includes:

  • generating a SCA co-evolutionary matrix, reduce it, and sort it according

to XCoRs,

  • showing the position composition of XCoRs,

  • plotting the XCoRs using the independent component projections.

# Author: Margaux Jullien <margaux.jullien@univ-grenoble-alpes.fr>
#         Nelle Varoquaux <nelle.varoquaux@univ-grenoble-alpes.fr>
#         Ivan Junier <ivan.junier@univ-grenoble-alpes.fr>
# License: TBD

Necessary imports

import numpy as np
import matplotlib.pyplot as plt


import cocoatree.datasets as c_data
import cocoatree.msa as c_msa
import cocoatree.deconvolution as c_deconv

from cocoatree.statistics.pairwise import compute_sca_matrix

Loading the dataset

Filtering the MSA

Computing the SCA matrix

Plotting the SCA matrix

fig, ax = plt.subplots()
im = ax.imshow(SCA_matrix, vmin=0, vmax=2, cmap='inferno')

ax.set_xlabel('residues', fontsize=10)
ax.set_ylabel(None)
ax.set_title('SCA matrix')
fig.colorbar(im, shrink=0.7)
SCA matrix
<matplotlib.colorbar.Colorbar object at 0x7f5e71d23a10>

Extracting XCoRs

n_xcors = 3
xcors = c_deconv.extract_xcors(SCA_matrix, n_xcors=n_xcors)

print('XCoR positions on (filtered) sequences:')
for ixcor, xcor in enumerate(xcors):
    print('XCoR_%d: %s' % (ixcor+1, xcor))
XCoR positions on (filtered) sequences:
XCoR_1: [64, 48, 63, 195, 182, 168, 197, 199, 108, 61, 144, 146, 198, 196, 216, 49, 192, 194, 222, 234, 145, 208, 211, 35, 235, 26, 62, 50, 155, 200, 97, 23, 73]
XCoR_2: [140, 202, 52, 57, 110, 111, 129, 78, 87, 58, 226, 75, 124, 128, 81, 34, 231, 32, 33, 204]
XCoR_3: [212, 225, 190, 180, 223, 183, 213, 210, 193, 224, 220, 172, 184, 176, 36, 217, 161, 37, 142, 160, 177, 46, 188, 24]

A plotting function

def plot_coevo_according2xcors(coevo_matrix, xcors=[], vmin=0, vmax=1e6):
    """ A plotting function returning a reduced coevo matrix, keeping
    only XCoRs and sorting the residues according to them
    """

    fig, ax = plt.subplots(tight_layout=True)

    xcor_sizes = [len(x) for x in xcors]
    cumul_sizes = sum(xcor_sizes)
    sorted_pos = [p for xcor in xcors for p in xcor]

    im = ax.imshow(coevo_matrix[np.ix_(sorted_pos, sorted_pos)],
                   vmin=vmin, vmax=vmax,
                   interpolation='none', aspect='equal',
                   extent=[0, cumul_sizes, cumul_sizes, 0],
                   cmap='inferno')
    cb = fig.colorbar(im)
    cb.set_label("coevolution level")

    line_index = 0
    for i in range(n_xcors):
        ax.plot([line_index + xcor_sizes[i], line_index + xcor_sizes[i]],
                [0, cumul_sizes], 'w', linewidth=2)
        ax.plot([0, cumul_sizes],
                [line_index + xcor_sizes[i], line_index + xcor_sizes[i]],
                'w', linewidth=2)
        line_index += xcor_sizes[i]

    ticks = []
    for ix in range(len(xcors)):
        shift = np.sum([len(xcors[j]) for j in range(ix)])
        ticks.append(shift+len(xcors[ix])/2)

    ax.set_xticks(ticks)
    ax.set_xticklabels(['XCoR_%d' % ix for ix in range(1, len(xcors)+1)])
    ax.set_yticks(ticks)
    ax.set_yticklabels(['XCoR_%d' % ix for ix in range(1, len(xcors)+1)],
                       rotation=90, va='center')

    return fig, ax

Plotting the SCA matrix according to the XCoRs

fig, ax = plot_coevo_according2xcors(SCA_matrix, xcors, vmin=0, vmax=2)
ax.set_title('SCA matrix, sorted according to XCoRs')
ax.set_xlabel('XCoR\'s positions', fontsize=10)
SCA matrix, sorted according to XCoRs
Text(0.5, 29.000000000000014, "XCoR's positions")

Removing a global mode (ngm = no global mode), i.e., setting largest eigeinvalue to zero

Plotting the SCA matrix without global mode according to the XCoRs

fig, ax = plot_coevo_according2xcors(SCA_matrix_ngm, xcors, vmin=0, vmax=2)
ax.set_title('SCA matrix without global mode\nsorted according to XCoRs')
ax.set_xlabel('XCoR\'s positions', fontsize=10)
SCA matrix without global mode sorted according to XCoRs
Text(0.5, 29.000000000000014, "XCoR's positions")

Adapting the coevolution scale

fig, ax = plot_coevo_according2xcors(SCA_matrix_ngm, xcors, vmin=0, vmax=1)
ax.set_title('SCA matrix without global mode\nsorted according to XCoRs')
ax.set_xlabel('XCoR\'s positions', fontsize=10)
SCA matrix without global mode sorted according to XCoRs
Text(0.5, 29.000000000000014, "XCoR's positions")

Visualizing XCoRs on independent components

# extracting independent components
ICs = c_deconv.extract_independent_components(SCA_matrix,
                                              n_components=n_xcors)

# plotting IC_1 values versus IC_2 values
fig, ax = plt.subplots()
ax.plot(ICs[0], ICs[1], '.k')

# highlighting XCoR-associated values using a color code
# red: XCoR_1, green: XCoR_2, blue: XCoR_3
for ixcor, color in zip([0, 1, 2], ['r', 'g', 'b']):
    ax.plot(ICs[0][xcors[ixcor]], ICs[1][xcors[ixcor]],
            '.', c=color, label='XCoR_%d' % (ixcor+1))

ax.set_xlabel('IC1')
ax.set_ylabel('IC2')

ax.legend()
plot 01 minimal coevo
<matplotlib.legend.Legend object at 0x7f5e71a3b4d0>

Total running time of the script: (0 minutes 2.852 seconds)

Gallery generated by Sphinx-Gallery