| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 
 |  
 
#include <iostream>
#include <fstream>
#include <string>
#include <sys/time.h>
#include <ext/hash_map>
extern "C" {
#include <time.h>
}
 
using namespace std;
namespace std { using namespace __gnu_cxx; }
 
#ifndef ullong
#define ullong unsigned long long
#endif
 
ullong getChrono() {
  struct timeval time;
  time_t sec;
  suseconds_t usec;
 
  if (gettimeofday(&time,NULL) != -1) {
    sec = time.tv_sec;
    usec = time.tv_usec;
    return (ullong)sec*1000000+usec;
  }
  return 0;
}
 
struct eqstr{
  bool operator()(const char* s1, const char* s2) const {
    return strcmp(s1,s2)==0;
  }
};
 
int main(int argc, char **argv)
{
  if (argc < 4){
    cerr << "Usage: " << argv[0] <<" read_file read_length threshold" << endl; 
    exit(1);
  }
  ullong start = getChrono();
 
  ifstream reads;
  reads.open(argv[1], ios::in);
  if (! reads.is_open()) {
    cerr << "Cannot open reads file: " << argv[1] << endl;
    exit(2);
  }
  reads.seekg (0, ios::end);
  ullong length = reads.tellg();
  int read_length = atoi(argv[2]);
  char *line = new char[read_length+1];
  reads.seekg (0, ios::beg);
  ullong nb_reads = length/(read_length+1);
  int threshold= atoi(argv[3]);
 
  hash_map<const char*, int, hash<const char*>, eqstr> hash_kmer;
 
  string read_string;
  const char *factor = new char[threshold+1];
  ullong current = 0;
 
  while (current < nb_reads) {
    reads.getline(line, read_length+1);
    read_string = line;
    for (ullong i = 0 ; i < (read_length-threshold) ; i++){
      factor = (read_string.substr(i,threshold)).c_str();
      hash_kmer[factor] ++ ;
    }
    current++;
  }  
  cout << "nb_factors indexed in hash_kmer: " << hash_kmer.size() << endl;
  cerr << "Creation of stl_hash_map: " << (getChrono()-start)/1000000. << " s" << endl;
  return 0;
} | 
Partager