1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#include <iostream>
#include <fstream>
#include <string>
#include <sys/time.h>
#include <ext/hash_map>
extern "C" {
#include <time.h>
}
using namespace std;
namespace std { using namespace __gnu_cxx; }
#ifndef ullong
#define ullong unsigned long long
#endif
ullong getChrono() {
struct timeval time;
time_t sec;
suseconds_t usec;
if (gettimeofday(&time,NULL) != -1) {
sec = time.tv_sec;
usec = time.tv_usec;
return (ullong)sec*1000000+usec;
}
return 0;
}
struct eqstr{
bool operator()(const char* s1, const char* s2) const {
return strcmp(s1,s2)==0;
}
};
int main(int argc, char **argv)
{
if (argc < 4){
cerr << "Usage: " << argv[0] <<" read_file read_length threshold" << endl;
exit(1);
}
ullong start = getChrono();
ifstream reads;
reads.open(argv[1], ios::in);
if (! reads.is_open()) {
cerr << "Cannot open reads file: " << argv[1] << endl;
exit(2);
}
reads.seekg (0, ios::end);
ullong length = reads.tellg();
int read_length = atoi(argv[2]);
char *line = new char[read_length+1];
reads.seekg (0, ios::beg);
ullong nb_reads = length/(read_length+1);
int threshold= atoi(argv[3]);
hash_map<const char*, int, hash<const char*>, eqstr> hash_kmer;
string read_string;
const char *factor = new char[threshold+1];
ullong current = 0;
while (current < nb_reads) {
reads.getline(line, read_length+1);
read_string = line;
for (ullong i = 0 ; i < (read_length-threshold) ; i++){
factor = (read_string.substr(i,threshold)).c_str();
hash_kmer[factor] ++ ;
}
current++;
}
cout << "nb_factors indexed in hash_kmer: " << hash_kmer.size() << endl;
cerr << "Creation of stl_hash_map: " << (getChrono()-start)/1000000. << " s" << endl;
return 0;
} |
Partager