1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
#include <iostream>
#include <fstream>
#include <string>
#include <sys/time.h>
#include <ext/hash_map>
#include "type.h"
#include "StringFactor.h"
extern "C" {
#include <time.h>
}
using namespace std;
namespace std { using namespace __gnu_cxx; }
ullong getChrono() {
struct timeval time;
time_t sec;
suseconds_t usec;
if (gettimeofday(&time,NULL) != -1) {
sec = time.tv_sec;
usec = time.tv_usec;
return (ullong)sec*1000000+usec;
}
return 0;
}
struct HashStringFactor : public hash<const char *>{
};
struct EqStringFactor{
bool operator()(const StringFactor &s1, const StringFactor &s2) const {
return strcmp(s1.c_str(),s2.c_str())==0;
}
};
int main(int argc, char **argv)
{
if (argc < 4){
cerr << "Usage: " << argv[0] <<" read_file read_length threshold" << endl;
exit(1);
}
ullong start = getChrono();
ifstream reads;
reads.open(argv[1], ios::in);
if (! reads.is_open()) {
cerr << "Cannot open reads file: " << argv[1] << endl;
exit(2);
}
reads.seekg (0, ios::end);
ullong length = reads.tellg();
uint read_length = atoi(argv[2]);
char *line = new char[read_length+1];
reads.seekg (0, ios::beg);
ullong nb_reads = length/(read_length+1);
uint threshold= atoi(argv[3]);
//ullong nb_elements = (read_length-threshold)*nb_reads;
hash_map<StringFactor, int, HashStringFactor, EqStringFactor > hash_kmer;
ullong current = 0;
// cout << "nb_reads: " << nb_reads << endl;
//cout << sizeof(ullong) << endl;
ullong state = 1;
while (current < nb_reads) {
if (current >= state*10000){
cout << current << " reads is included in stl_hash in " << (getChrono()-start)/1000000. << " s" << endl;
state++;
}
reads.getline(line, read_length+1);
for (ullong i = 0 ; i < (read_length-threshold) ; i++){
StringFactor factor = StringFactor(line,i,threshold);
hash_kmer[factor] ++ ;
}
current++;
}
cout << "nb_factors indexed in hash_kmer: " << hash_kmer.size() << endl;
cerr << "Creation of stl_hash_map: " << (getChrono()-start)/1000000. << " s" << endl;
return 0;
} |