/usr/include/kmer/seqStore.H is in libmeryl-dev 0~20150903+r2013-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | #ifndef SEQSTORE_H
#define SEQSTORE_H
#include "util++.H"
#include "seqCache.H"
// A binary fasta file.
//
// HEADER
// magic number
// number of sequences
// optional - alphabet size
// optional - alphabet map (0x00 -> 'a', etc)
// position of index start
// position of data start
// DATA
// INDEX
// position of sequence start in DATA
// header length
// sequence length
// MAP
// name to IID mapping
struct seqStoreHeader {
uint64 _magic[2];
uint32 _pad;
uint32 _numberOfSequences;
uint64 _numberOfACGT;
uint32 _numberOfBlocksACGT;
uint32 _numberOfBlocksGAP;
uint32 _numberOfBlocks;
uint32 _namesLength;
uint64 _indexStart;
uint64 _blockStart;
uint64 _namesStart;
};
// This index allows us to return a complete sequence
//
struct seqStoreIndex {
uint32 _hdrPosition; // Offset into _names for the defline
uint32 _hdrLength; // Length of the defline
uint64 _seqPosition; // Offset into _bpf for the sequence data
uint32 _seqLength; // Length, in bases, of the sequence
uint32 _block; // The seqStoreBlock that starts this sequence
};
// This index allows us to seek to a specific base in the
// file of sequences. Each block is either:
// ACGT - and has data
// N - no data
// It will map a specific ACGT location to the sequence, and the ID
// of that sequence (seq ID and location in that sequence).
//
struct seqStoreBlock {
uint64 _isACGT:1; // block is acgt
uint64 _pos:32; // position in sequence
uint64 _iid:32; // iid of the sequence we are in
uint64 _len:23; // length of block
uint64 _bpf:40; // position in the bit file of sequence
};
#define SEQSTOREBLOCK_MAXPOS uint64MASK(32)
#define SEQSTOREBLOCK_MAXIID uint64MASK(32)
#define SEQSTOREBLOCK_MAXLEN uint64MASK(23)
class seqStore : public seqFile {
protected:
seqStore(const char *filename);
seqStore();
public:
~seqStore();
protected:
seqFile *openFile(const char *filename);
public:
uint32 find(const char *sequencename);
uint32 getSequenceLength(uint32 iid);
bool getSequence(uint32 iid,
char *&h, uint32 &hLen, uint32 &hMax,
char *&s, uint32 &sLen, uint32 &sMax);
bool getSequence(uint32 iid,
uint32 bgn, uint32 end, char *s);
private:
void clear(void);
void loadIndex(void);
bitPackedFile *_bpf;
seqStoreHeader _header;
seqStoreIndex *_index;
seqStoreBlock *_block;
char *_names;
bitPackedFile *_indexBPF;
bitPackedFile *_blockBPF;
bitPackedFile *_namesBPF;
uint32 _lastIIDloaded;
friend class seqFactory;
};
// Construct a new seqStore 'filename' from input file 'inputseq'.
//
void
constructSeqStore(char *filename,
seqCache *inputseq);
#endif // SEQSTORE_H
|