/usr/include/freecontact.h is in libfreecontact-dev 1.0.21-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | /* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment
* Copyright (C) 2012-2013 Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LIBFREECONTACT_H
#define LIBFREECONTACT_H
#include <errno.h>
#include <map>
#include <math.h>
#include <stdexcept>
#include <stdint.h>
#include <string.h>
#include <vector>
#ifdef __SSE2__
#include <x86intrin.h>
#endif // __SSE2__
#define LIBFREEC_API __attribute__ ((visibility ("default")))
#define LIBFREEC_LOCAL __attribute__ ((visibility ("hidden")))
namespace freecontact {
// http://www.fao.org/docrep/004/Y2775E/y2775e0e.htm
// Use 20 for gap.
static const uint8_t aamap_gapidx = 20;
static const uint8_t aamap[] = {
// A B C D E F G H I J K L M N
21, 0, 3, 4, 3, 6, 13, 7, 8, 9, 21, 11, 10, 12, 2,
// O P Q R S T U V W X Y Z
21, 14, 5, 1, 15, 16, 21, 19, 17, 21, 18, 6
};
static const uint8_t mapaa[] = {
//0 1 2 3 4 5 6 7 8 9
'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V',
'-', 'X'
};
/// Alignment length exception.
class LIBFREEC_API alilen_error : public std::runtime_error {
public:
alilen_error(const std::string& __arg) : std::runtime_error(__arg){}
};
/// Inverse covariance matrix estimation timeout exception.
class LIBFREEC_API icme_timeout_error : public std::runtime_error {
public:
icme_timeout_error(const std::string& __arg) : std::runtime_error(__arg){}
};
#ifndef __SSE2__
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#endif
/// Protein sequence alignment.
class LIBFREEC_API ali_t : public std::vector<__m128i>
{
private:
#pragma GCC visibility push(hidden)
#pragma GCC visibility pop
public:
uint32_t seqcnt;
uint16_t alilen; // aligned sequence length, max 65535, reasonable
uint16_t alilen16; // size in 16-bytes
uint16_t alilenpad; // 16 * alilen16
ali_t( const uint16_t __alilen = 0 ) : std::vector<__m128i>(), seqcnt(0), alilen(__alilen), alilen16(( alilen-1 )/16 + 1), alilenpad( 16 * alilen16 )
{
reserve(1024*alilen16);
}
virtual ~ali_t(){}
inline uint8_t& operator()(uint32_t __k, uint16_t __ai)
{
return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
}
inline const uint8_t& operator()(uint32_t __k, uint16_t __ai) const
{
return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
}
/// Convert alignment string to amino acid codes with aamap.
static std::vector<uint8_t>
read_a_seq( const std::string& __l )
{
std::vector<uint8_t> ret; ret.reserve(__l.length());
for( std::string::const_iterator l_b = __l.begin(), l_e = __l.end(); l_b != l_e; ++l_b )
{
const std::string::value_type c = *l_b;
if( isalpha(c) )
ret.push_back( aamap[c & 0x1F] );
else if( c == '-' )
ret.push_back( 20 );
else break;
}
return ret;
}
/// Push a sequence to the alignment.
ali_t& push(const std::vector<uint8_t>& __al) throw (alilen_error);
/// Push a sequence to the alignment.
inline ali_t& push(const std::string& __l) throw (alilen_error)
{
return push(read_a_seq(__l));
}
};
/// Contact prediction.
struct LIBFREEC_API contact_t {
/// Residue number, 0-based(!).
uint16_t i, j;
/// Contact score.
float score;
contact_t( uint16_t __i = 0, uint16_t __j = 0, float __score = 0 ) : i(__i), j(__j), score(__score) {}
inline bool operator<( const contact_t& __b ) const { return score < __b.score; } // to facilitate sorting by score
inline bool operator>( const contact_t& __b ) const { return score > __b.score; } // to facilitate sorting by score
};
/// Parameter set structure for predictor::run().
struct parset_t {
double clustpc, density, gapth; uint16_t mincontsep;
double pseudocnt, pscnt_weight; bool estimate_ivcov; double shrink_lambda;
bool cov20, apply_gapth; double rho;
parset_t( double __clustpc = 0, double __density = 0, double __gapth = 0, uint16_t __mincontsep = 0,
double __pseudocnt = 0, double __pscnt_weight = 0, bool __estimate_ivcov = 0, double __shrink_lambda = 0,
bool __cov20 = 0, bool __apply_gapth = 0, double __rho = 0
) :
clustpc(__clustpc), density(__density), gapth(__gapth), mincontsep(__mincontsep),
pseudocnt(__pseudocnt), pscnt_weight(__pscnt_weight), estimate_ivcov(__estimate_ivcov), shrink_lambda(__shrink_lambda),
cov20(__cov20), apply_gapth(__apply_gapth), rho(__rho)
{}
};
const parset_t // clust conts gapt minc pseud pscw estim shrin cov20 agapth rho
//ps_evfold(0.70, 0.0, 1.0, 1, 0.0, 0.5, false, 0.0, true, false, -1 ), // original Matlab that expects gap pre-filtering
ps_evfold( 0.70, 0.0, 0.9, 1, 0.0, 0.5, false, 0.0, true, true, -1 ),
ps_psicov( 0.62, 0.03, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, -1 ), // as published
ps_psicov_sd( 0.62, 0.0, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, 0.001 ); // PSICOV sensible default (sd) for most cases
/// Protein residue contact predictor.
/** A sufficiently large multiple alignment is required for meaningful results.
*/
class LIBFREEC_API predictor {
public:
typedef std::map<std::string, std::vector<contact_t> > cont_res_t;
typedef double freq_t;
typedef float pairfreq_t;
typedef std::vector<freq_t> freq_vec_t;
typedef std::map<std::string, double> time_res_t;
public:
bool dbg;
predictor(bool __dbg = false) : dbg(__dbg) {}
virtual ~predictor(){}
/// Calculate alignment sequence weights.
/** \param [out] __aliw Vector of alignment sequence weights.
* \param [out] __wtot Total alignment weight.
* \param [in] __ali Input alignment.
* \param [in] __clustpc BLOSUM-style clustering similarity threshold [0-1].
* \param [in] __veczw Use vectorized sequence weighting when available.
* \param [in] __num_threads Number of OpenMP threads, effective if non-zero. The default is as many threads as cores in host.
Ineffective if library is not compiled with OMP support.
*/
void get_seq_weights(
freq_vec_t &__aliw, double &__wtot,
const ali_t& __ali, double __clustpc,
bool __veczw = true, int __num_threads = 0
) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
/// Predict residue contacts.
/** \param [in] __ali Input alignment.
* \param [in] __aliw Vector of alignment sequence weights.
* Obtain this with get_seq_weights().
* \param [in] __wtot Total weight of alignment.
* Obtain this with get_seq_weights().
* \param [in] __density Target precision matrix density [0-1]. Set to 0 to not control density.
* \param [in] __gapth Threshold of weighted gap frequency for ignoring alignment columns with too many gaps [0-1]. Set to 1.00 to keep all columns.
* This is implemented by using a very high regularization value for gap columns, and by excluding gap columns from the covariance
* matrix, see __apply_gapth.
* \param [in] __mincontsep Minimum sequence-wise contacting residue pair separation given in amino acids as (j-i). 1 for adjacent residues. [1-).
* \param [in] __pseudocnt Number to initialize single and pair amino acid counts with [0-).
* \param [in] __pscnt_weight Pseudocount weight to apply to single and pair amino acid frequencies [0-1].
* \param [in] __estimate_ivcov Estimate inverse covariance matrix instead of fully inverting matrix. This is currently done by GLASSOFAST.
* \param [in] __shrink_lambda Covariance matrix shrinkage parameter, controlling rate of shrinkage [0-1].
* \param [in] __cov20 Leave one amino acid off the covariance matrix, making it non-overdetermined.
* \param [in] __apply_gapth When true, exclude residue columns and rows with a weighted gap frequency > __gapth from the covariance matrix.
* \param [in] __rho Initial value of Glasso regularization parameter. Negative values trigger an automatic choice for rho.
* \param [in] __num_threads Number of OpenMP threads, effective if non-zero. The default is as many threads as cores in host.
Ineffective if library is not compiled with OMP support.
* \param [in] __icme_timeout Inverse covariance matrix estimation timeout in seconds. Default: 1800.
Applied to each inversion call independently.
* \param [out] __timing Pointer to map of timing results for certain components of this method. Useful for debugging. Keys are: [num_threads|seqw|pairfreq|shrink|inv|all].
* \return Map of vectors of predicted contacts.
* Key is contact score calculation method name: 'l1norm', 'MI' (mutual information), 'fro' (Frobenius norm after zero-sum gauge).
* Vectors may not contain all contact pairs. Contacts are ordered by residue index (i,j).
*/
cont_res_t run(const ali_t& __ali, const freq_vec_t &__aliw, const double __wtot,
double __density, double __gapth, uint16_t __mincontsep,
double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
bool __cov20, bool __apply_gapth, double __rho,
int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
/// Predict residue contacts.
cont_res_t run(const ali_t& __ali, double __clustpc,
double __density, double __gapth, uint16_t __mincontsep,
double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
bool __cov20, bool __apply_gapth, double __rho,
bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
/// Predict residue contacts.
inline
cont_res_t run(const ali_t& __ali,
const parset_t& __parset,
bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
{
return run(__ali, __parset.clustpc,
__parset.density, __parset.gapth, __parset.mincontsep,
__parset.pseudocnt, __parset.pscnt_weight, __parset.estimate_ivcov, __parset.shrink_lambda,
__parset.cov20, __parset.apply_gapth, __parset.rho,
__veczw, __num_threads, __icme_timeout, __timing);
}
};
} // namespace freecontact
#endif // LIBFREECONTACT_H
// vim:et:ts=4:ai:
|