Source code for MDAnalysis.analysis.leaflet

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
#               Elizabeth J. Denning, Oliver Beckstein,
#               and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
#     doi:10.1002/jcc.21787
#

"""
Leaflet identification --- :mod:`MDAnalysis.analysis.leaflet`
==============================================================

:Author: Oliver Beckstein
:Year: 2010
:Copyright: GNU Public License v3

Algorithm:
  1. build a graph of all phosphate distances < cutoff
  2. identify the largest connected subgraphs
  3. analyse first and second largest graph, which correspond to the leaflets

One can use this information to identify

* the upper and lower leaflet of a *planar membrane* by comparing the
  the :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.centerOfGeometry` of
  the leaflet groups, or

* the outer and inner leaflet of a *vesicle* by comparing histograms
  of distances from the centre of geometry (or possibly simply the
  :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.radiusOfGyration`).

See example scripts in the ``examples/`` directory on how to use
:class:`LeafletFinder`. The function :func:`optimize_cutoff` implements a
(slow) heuristic method to find the best cut off for the LeafletFinder
algorithm.

.. autoclass:: LeafletFinder
   :members:

.. autofunction:: optimize_cutoff

"""

import numpy
import MDAnalysis
import networkx as NX
import distances
import warnings

[docs]class LeafletFinder(object): """Identify atoms in the same leaflet of a lipid bilayer. The components of the graph are stored in the list :attr:`LeafletFinder.components`; the atoms in each component are numbered consecutively, starting at 0. To obtain the atoms in the input structure use :meth:`LeafletFinder.groups`:: L = LeafletFinder(PDB, 'name P*') leaflet0 = L.groups(0) leaflet1 = L.groups(1) The residues can be accessed through the standard MDAnalysis mechanism:: leaflet0.residues provides a :class:`~MDAnalysis.core.AtomGroup.ResidueGroup` instance. Similarly, all atoms in the first leaflet are then :: leaflet0.residues.atoms """ def __init__(self, universe, selectionstring, cutoff=15.0, pbc=False): """Initialize from a *universe* or pdb file. :Arguments: *universe* :class:`MDAnalysis.Universe` or a PDB file name. *selectionstring* :meth:`MDAnalysis.Universe.selectAtoms` selection for atoms that define the lipid head groups, e.g. "name PO4" or "name P*" :Keywords: *cutoff* head group-defining atoms within a distance of *cutoff* Angstroms are deemed to be in the same leaflet [15.0] *pbc* take periodic boundary conditions into account (only works for orthorhombic boxes) [``False``] """ universe = MDAnalysis.asUniverse(universe) self.universe = universe self.selectionstring = selectionstring self.selection = universe.selectAtoms(selectionstring) self.pbc = pbc self._init_graph(cutoff) def _init_graph(self, cutoff): self.cutoff = cutoff self.graph = self._get_graph() self.components = self._get_components() # The last two calls in _get_graph() and the single line in # _get_components() are all that are needed to make the leaflet # detection work. def _get_graph(self): """Build graph from adjacency matrix at the given cutoff. Automatically select between high and low memory usage versions of contact_matrix.""" # could use self_distance_array to speed up but then need to deal with the sparse indexing if self.pbc: box = self.universe.trajectory.ts.dimensions else: box = None coord = self.selection.coordinates() # this works for small systems try: adj = distances.contact_matrix(coord,cutoff=self.cutoff,returntype="numpy",box=box) # but use a sparse matrix method for larger systems for memory reasons except ValueError: warnings.warn('N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather slow)' , category=UserWarning , stacklevel=2) adj = distances.contact_matrix(coord,cutoff=self.cutoff,returntype="sparse",box=box) return NX.Graph(adj) def _get_components(self): """Return connected components (as sorted numpy arrays), sorted by size.""" return [numpy.sort(component) for component in NX.connected_components(self.graph)]
[docs] def update(self, cutoff=None): """Update components, possibly with a different *cutoff*""" if cutoff is None: cutoff = self.cutoff self._init_graph(cutoff)
[docs] def sizes(self): """Dict of component index with size of component.""" return dict(((idx,len(component)) for idx,component in enumerate(self.components)))
[docs] def groups(self, component_index=None): """Return a :class:`MDAnalysis.core.AtomGroup.AtomGroup` for *component_index*. If no argument is supplied, then a list of all leaflet groups is returned. .. SeeAlso:: :meth:`LeafletFinder.group` and :meth:`LeafletFinder.groups_iter` """ if component_index is None: return list(self.groups_iter()) else: return self.group(component_index)
[docs] def group(self, component_index): """Return a :class:`MDAnalysis.core.AtomGroup.AtomGroup` for *component_index*.""" # maybe cache this? return MDAnalysis.core.AtomGroup.AtomGroup( [self.selection[i] for i in self.components[component_index]])
[docs] def groups_iter(self): """Iterator over all leaflet :meth:`groups`""" for component_index in xrange(len(self.components)): yield self.group(component_index)
[docs] def write_selection(self, filename, **kwargs): """Write selections for the leaflets to *filename*. The format is typically determined by the extension of *filename* (e.g. "vmd", "pml", or "ndx" for VMD, PyMol, or Gromacs). See :class:`MDAnalysis.selections.base.SelectionWriter` for all options. """ import MDAnalysis.selections SelectionWriter = MDAnalysis.selections.get_writer(filename, kwargs.pop('format',None)) writer = SelectionWriter( filename, mode=kwargs.pop('mode', 'wa'), preamble="leaflets based on selection=%(selectionstring)r cutoff=%(cutoff)f\n" % vars(self), **kwargs) for i, ag in enumerate(self.groups_iter()): name = "leaflet_%d" % (i+1) writer.write(ag, name=name)
def __repr__(self): return "<LeafletFinder(%r, cutoff=%.1f A) with %d atoms in %d groups>" % \ (self.selectionstring, self.cutoff, self.selection.numberOfAtoms(), len(self.components))
[docs]def optimize_cutoff(universe, selection, dmin=10.0, dmax=20.0, step=0.5, max_imbalance=0.2, **kwargs): """Find cutoff that minimizes number of disconnected groups. Applies heuristics to find best groups: 1. at least two groups (assumes that there are at least 2 leaflets) 2. reject any solutions for which: `|N0 - N1|/|N0 + N1| > *max_imbalance*` Ni = number of lipids in group i. This heuristic picks groups with balanced numbers of lipids. :Arguments: *universe* :class:`MDAnalysis.Universe` instance *selection* selection string as used for :class:`LeafletFinder` *dmin*, *dmax*, *step* scan cutoffs from *dmin* to *dmax* at stepsize*step (in Angstroms) *max_imbalance* tuning parameter for the balancing heuristic (2) [0.2] *kwargs* other arguments for :class:`LeafletFinder` :Returns: ``(cutoff,N)`` optimum cutoff and number of groups found :Raises: can die in various ways if really no appropriate number of groups can be found; needs to be made more robust """ kwargs.pop('cutoff', None) # not used, so we filter it _sizes = [] for cutoff in numpy.arange(dmin, dmax, step): LF = LeafletFinder(universe, selection, cutoff=cutoff, **kwargs) # heuristic: # 1) N > 1 # 2) no imbalance between large groups: sizes = LF.sizes() if len(sizes) < 2: continue n0 = float(sizes[0]) # sizes of two biggest groups ... n1 = float(sizes[1]) # ... assumed to be the leaflets imbalance = numpy.abs(n0 - n1)/(n0 + n1) # print "sizes: %(sizes)r; imbalance=%(imbalance)f" % vars() if imbalance > max_imbalance: continue _sizes.append((cutoff, len(LF.sizes()))) results = numpy.rec.fromrecords(_sizes, names="cutoff,N") del _sizes results.sort(order=["N","cutoff"]) # sort ascending by N, then cutoff return results[0] # (cutoff,N) with N>1 and shortest cutoff