6 #include "suffix_tree.h"    11   for(
unsigned int i = 0; i < MAX_CHAR;i++)
    12     this->children[i] = 0;
    20 bool suffix_node::is_leaf_node(){
    21   return (nchildren == 0);
    24 bool suffix_node::contains_child(
int ext){
    25   return !(this->children[ext] == 0);
    28 int suffix_node::get_length(){
    31   return *(this->end) - this->begin + 1;
    34 int suffix_node::get_depth(){
    44 int get_hamming_distance(std::string s1, std::string s2, 
int k){
    46   for(
unsigned int i = 0; i < s1.length(); i++){
    56 std::string suffix_node::get_longest_common_substring(std::string s1, std::string s2){
    57   std::string str = s1+
"#"+s2+
"@";
    58   std::string p=
"", tmp, longest_trvsl = 
"";
    59   unsigned int max = 0, i;
    64     tmp = node->get_path(str);
    65     if(s1.find(p+tmp) != std::string::npos && s2.find(p+tmp) != std::string::npos)
    69   for(i = 0; i < MAX_CHAR;i++){
    70     if(node->children[i] == 0)
    72     longest_trvsl = node->children[i]->get_longest_common_substring(s1, s2);
    73     if(longest_trvsl.length() > max){
    74       max = longest_trvsl.length();
    82 std::string suffix_node::get_path(std::string s){
    85   return s.substr(this->begin, *(this->end) - this->begin + 1);
    88 void suffix_node::extend_path(
int *e){
    94   this->children[ext] = n;
    99 void suffix_node::add_child(
suffix_node* c, 
int ext){
   100   this->children[ext] = c;
   106   return this->children[ext];
   109 bool suffix_node::contains_depth(
int depth){
   110   return this->get_depth() <= depth && depth <= (this->get_length() + this->get_depth());
   113 void suffix_node::print(std::string s){
   114   for(
int i = 0; i < this->get_depth(); i++){
   117   std::string t = (this->begin == -1) ? 
"R" : 
" "+s.substr(this->begin, *(this->end) - this->begin + 1);
   119   if(this->suffix_link != 0)
   120     std::cout << 
" --- ( " << this->suffix_link->parent->get_path(s) << 
" " << this->suffix_link->get_path(s) << 
")";
   122     std::cout << 
" - " << this->parent->get_path(s);
   123   std::cout << std::endl;
   124   for(
unsigned int i = 0; i<alphabet.length();i++){
   125     if(this->children[i]!=0)
   126       this->children[i]->print(s);
   130 bool suffix_node::walk_next(
int &beg, 
int &suffix_length){
   132   if(suffix_length >= node->get_length()){
   133     beg += node->get_length();
   134     suffix_length -= node->get_length();
   142   root->suffix_link = root;
   144   suffix_node *cur_node = root, *new_node = 0, *new_cur_node = 0;
   145   int str_ind[MAX_SIZE], suffix_length = 0, *leaf_end=  
new int, suffix_count = 0, beg = -1, *end;
   146   unsigned int i = 0, n_str_ind = 0;
   147   for(i = 0; i < s.length();i++){
   148     str_ind[i] = alphabet.find(s[i]);
   153   for(i = 0; i < n_str_ind;i++){
   157     while(suffix_count > 0){
   158       if(suffix_length == 0)
   160       if(cur_node->children[str_ind[beg]] == 0){         
   161     cur_node->add_child(str_ind[beg], beg, leaf_end, 0); 
   163       new_node->suffix_link = cur_node;
   167     new_cur_node = cur_node->children[str_ind[beg]];
   168     if(new_cur_node->walk_next(beg, suffix_length)){
   169       cur_node = new_cur_node;
   172     if(str_ind[new_cur_node->begin + suffix_length] == str_ind[i]){ 
   173       if(new_node != 0 && cur_node->end != 0){
   174         new_node->suffix_link = cur_node;
   183     *end = new_cur_node->begin + suffix_length - 1;
   185     new_int_node = cur_node->add_child(str_ind[beg], new_cur_node->begin, end, 0);
   186     new_int_node->add_child(str_ind[i], i, leaf_end, 0);
   187     new_cur_node->begin += suffix_length;
   188     new_int_node->add_child(new_cur_node, str_ind[new_cur_node->begin]);
   190       new_node->suffix_link = new_int_node;
   191     new_node = new_int_node;
   194       if(cur_node->end==0 && suffix_length > 0){
   196     beg = i - suffix_count + 1;
   197       } 
else if(cur_node->end!=0){
   198     if(cur_node->suffix_link != 0){
   199       cur_node = cur_node->suffix_link;
   201       cur_node = cur_node->parent->suffix_link;
   203       } 
else if(cur_node->get_length() == 1){
   211 std::string get_reverse_complement(std::string rev_read){
   213   for(
unsigned int i = 0;i< rev_read.length();i++){
   230   std::reverse(rev_read.begin(), rev_read.end());
   234 std::vector<std::string> read_adapters_from_fasta(std::string p){
   235   std::vector<std::string> adp;
   236   std::ifstream fin(p);
   238   while (std::getline(fin, line)){
   246 std::string extend_till_mismatches(std::string cmn, std::string l, std::string adp){
   247   if(cmn.length() < MIN_LENGTH){
   250   int pos = adp.find(cmn);
   251   int r_pos = l.find(cmn);
   257   while(r_pos >= 0 && pos >= 0 && k <= MAX_MISMATCHES){
   258     cmn = l[r_pos] + cmn;
   259     if(l[r_pos]!=adp[pos])
   267 int trim_adapter(std::string f1, std::string f2, std::string adp_path, std::string p){
   268   std::string l1, l2, l1_rc, l2_rc, s, cmn, trmd_read;
   269   int *t = 
new int[2], beg;
   271   std::ifstream ff1(f1), ff2, pf(p);
   272   std::ofstream out(p+
".trimmed.fastq");
   273   std::vector<std::string>::iterator it;
   275   std::vector<std::string> adp = read_adapters_from_fasta(adp_path);
   276   int i = -1, n = 0, adp_pos;
   279     while (std::getline(ff1, l1) && std::getline(ff2, l2)){
   285     while (std::getline(ff1, l1)){
   288     out << l1 << std::endl;
   294       for(it = adp.begin(); it != adp.end(); ++it) {
   295     s = l1 + 
"#" + *it + 
"@";
   296     root = build_suffix_tree(s);
   297     cmn = root->get_longest_common_substring(l1, *it);
   298     cmn = extend_till_mismatches(cmn, l1, *it);
   300       l1_rc = get_reverse_complement(l1);
   301       s = l1_rc + 
"#" + *it + 
"@";
   302       root = build_suffix_tree(s);
   303       cmn = root->get_longest_common_substring(l1_rc, *it);
   304       cmn = extend_till_mismatches(cmn, l1_rc, *it);
   305       pos = l1_rc.find(cmn);
   306       beg = l1_rc.length() - pos;
   307       adp_pos = (*it).find(cmn);
   311       adp_pos = (*it).find(cmn);
   313     if(cmn.length() >= MIN_LENGTH && adp_pos != 0){
   316       t = align_seqs(l1, *it);
   321         std::cout << 
"Reverse!" << std::endl;
   322         l1_rc = get_reverse_complement(l1);
   323         t = align_seqs(l1_rc, *it);
   325           pos = l1.length() - *(t+1);
   330     std::cout << beg << 
" " << pos << std::endl;
   331     if(beg != 0 || pos != l1.length()){
   332       trmd_read = l1.substr(beg, pos);
   337       out << trmd_read << std::endl;
   340   std::cout << 
"Number of Adapter Trimmed: :" << n << std::endl;