
//  ReadMapper.cpp
//  kissreads_g
//
//  Created by Pierre Peterlongo on 03/01/13.
//  Copyright (c) 2013 Pierre Peterlongo. All rights reserved.
//

#include "ReadMapper.h"
extern int size_seeds;


uint64_t map_reads(Bank *read_bank, DBG * graph){
    char * read;
    uint64_t number_mapped = 0;
    char seed [size_seeds+1];
    char * rev_comp_read = (char *)malloc(sizeof(char)*16384); 	test_alloc(rev_comp_read);
    int size_read;
    
    uint64_t offset_seed;
    uint64_t nb_seeds;
    for(int file_id=0;file_id<read_bank->nb_files;file_id++){
        while(read_bank->get_next_seq_from_file(&read,&size_read, file_id)){ // each read
            if(number_mapped%1000==0)
                printf("\r %ld reads mapped", number_mapped);
            for(int i=0;i<strlen(read);i++) read[i] = toupper(read[i]);
            // Given that error are authorized, this read may be mapped on the graph using several distinct intial nodes (where a seed provides an anchoring position)
            // Thus one needs to try all possible anchors leading to a map, and to keep only the best one if it is unique. If the best one is not unique, we don't map the whole read.
            vector<vector<Edge_dbg *> > left_paths; // stores all successfull left paths
            vector<vector<Edge_dbg *> > right_paths; // stores all successfull right paths
            vector<Node_dbg *> anchoring_nodes; // stores all successfull anchoring nodes
            vector<int> pwis; // stores all successfull pwi used for anchoring
            vector<int> distances; // store all sucesfull mapped distances
            vector<bool> forwards; // for each successfull mapping, stores if read was in reverse (false) or forward (true) strand
//            printf("mapping read %s\n", read);
            int stop = size_read-size_seeds+1;
            for(int direction=0;direction<2;direction++){
                // read all seeds present on the read:
                for (int i=0;i<stop;i++){
                    for(int j=0;j<size_seeds;j++) seed[j]=read[i+j]; seed[size_seeds]='\0';// read the seed
//                    printf("trying seed %s\n", seed);
                    // if the seed is indexed in the fragments:
                    if(get_seed_info(graph->seeds_count,seed,&offset_seed,&nb_seeds)){
//                        printf("occurring at %d positions\n", nb_seeds);
                        for (int ii=offset_seed; ii<offset_seed+nb_seeds; ii++) {
                            int node_id = graph->seed_table[ii].a;
                            Node_dbg* node = graph->all_nodes[node_id];
                            int pwi = graph->seed_table[ii].b-i; // starting position of the read on the node.
                            // overview situation:
                            
                            //        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  node sequence
                            //        <---------> b
                            //                   [--------]                     seed
                            //             ******************************       read
                            //             <----> i
                            //        <---> pwi
                            if(node->fragment->isPalindromic() && direction) {
//                                printf("this node %s is palindromic, we read it only in forward\n", node->fragment->fragment_sequence);
                                continue; // we considere only the forward part of palindromic nodes
                            }

                            if(!graph->position_already_tested(node, pwi)){ // test if this position was not already tested with this read
                                vector <Edge_dbg *> right_path;
                                vector <Edge_dbg *> left_path;
                                int distance;
                                bool is_mapped = graph->map_approx_a_fragment(graph->seed_table[ii].b, i, node, read, &distance, &left_path, &right_path);
//                                if(!is_mapped) printf("I%s mapped %s (direction %s) on %s (node id %d), position %d/%d with %d mismatche(s)\n", is_mapped?" successfully":" did not", read, direction?"revcomp":"forward",node->fragment->fragment_sequence, node_id, pwi, graph->seed_table[ii].b, distance); // DEB
                                if(is_mapped){ // this read is correctly mapped
                                    
                                    distances.push_back(distance);
                                    left_paths.push_back(left_path);
                                    right_paths.push_back(right_path);
                                    //                                printf("%d %s\n", right_paths.size(), right_paths[0]to->fragment->fragment_sequence);
                                    anchoring_nodes.push_back(node);
                                    pwis.push_back(pwi);
                                    if(direction==0) forwards.push_back(true); else forwards.push_back(false);
                                } // end this read is correcly mapped
                            } // end this position was not already tested with this read
//                            else printf("already tested\n");
                        } // end each occurrence of the seed on the graph
                    } // end this seed is indexed
                } // end each position on the read
                revcomp(read,size_read); // know switch to the reverse complement for the second run
            } // end the two directions
            graph->bv->reinit();
//            printf("reinint boolean vector\n");
            
            
            // 1/ check that a unique mapping provides the best alignment, minimizing the substitutions and
            // 2/ we populate the associated paths
            // 3/ free all vectors

            // 1/
            bool found_unique_path=false;
            int min_dist=graph->threshold_substitutions+1;
            int id_best_path=-1;
            
            for(int i=0;i<distances.size();i++){
//                printf("distance[i] = %d\n", distances[i]);
                if(distances[i] < min_dist){
                    found_unique_path=true;
                    min_dist = distances[i];
                    id_best_path=i;
                    continue;
                }
                if(min_dist == distances[i]){
                    found_unique_path=false;
                }
            }
            // 2/
//            if(!found_unique_path)printf("among %d paths, found_unique_path = %s (read %s)\n", distances.size(), found_unique_path?"true":"false", read);
            if(found_unique_path){
                number_mapped++;
                if(!forwards[id_best_path])
                    revcomp(read,size_read); // this was the revcomp read that was mapped.
                
                graph->populate_given_paths(left_paths[id_best_path], right_paths[id_best_path], anchoring_nodes[id_best_path], pwis[id_best_path], file_id, read);
                
                if(!forwards[id_best_path])
                    revcomp(read,size_read); // this was the revcomp read that was mapped. Put it back.
            }
            
            // 3/
            for(int i = 0; i<distances.size();i++){
                left_paths[i].erase(left_paths[i].begin(), left_paths[i].end());
                right_paths[i].erase(right_paths[i].begin(), right_paths[i].end());
            }
            left_paths.erase(left_paths.begin(), left_paths.end());
            right_paths.erase(right_paths.begin(), right_paths.end());
            anchoring_nodes.erase(anchoring_nodes.begin(), anchoring_nodes.end());
            pwis.erase(pwis.begin(), pwis.end());
            forwards.erase(forwards.begin(), forwards.end());
//            free(read);
            
        } // end each read
    } // end each file
    printf("\n");
    return number_mapped;
}

//void map_reads_deprecated(Bank *read_bank, DBG * graph){
////    char * read;
//    char seed [size_seeds+1];
//    char * rev_comp_read = (char *)malloc(sizeof(char)*16384); 	test_alloc(rev_comp_read);
//    int size_read;
//    
//    uint64_t offset_seed;
//    uint64_t nb_seeds;
//    for(int file_id=0;file_id<read_bank->nb_files;file_id++){
//        while(read_bank->get_next_seq_from_file(&read,&size_read, file_id)){ // each read
//            
//            printf("mapping read %s\n", read);
//            
//            int stop = size_read-size_seeds+1;
//            for(int direction=0;direction<2;direction++){
//                // read all seeds present on the read:
//                for (int i=0;i<stop;i++){
//                    for(int j=0;j<size_seeds;j++) seed[j]=read[i+j]; seed[size_seeds]='\0';// read the seed
////                    printf("trying seed %s\n", seed);
//                    // if the seed is indexed in the fragments:
//                    if(get_seed_info(graph->seeds_count,seed,&offset_seed,&nb_seeds)){
////                        printf("occurring at %d positions\n", nb_seeds);
//                        for (int ii=offset_seed; ii<offset_seed+nb_seeds; ii++) {
//                            int node_id = graph->seed_table[ii].a;
//                            Node_dbg* node = graph->all_nodes[node_id];
//                            int pwi = graph->seed_table[ii].b-i; // starting position of the read on the node.
//                            // overview situation:
//                            
//                            //        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  node sequence
//                            //        <---------> b
//                            //                   [--------]                     seed
//                            //             ******************************       read
//                            //             <----> i
//                            //        <---> pwi
//                            
//                            // if pw<0, another node exists on which the mapped_fragment starts. In such case anotehr call to this map_strictily_a_fragment, whith pwi>0 on another node is equivalent
//                            if(pwi>=0 && !graph->position_already_tested(node, pwi)){ // test if this position was not already tested with this read
//                                
//                                bool is_mapped = graph->map_strictly_a_fragment(pwi, node, read);
//                                if(is_mapped) graph->populate(pwi,node,read,file_id);
////                                printf("I%s mapped %s on %s, position %d/%d\n", is_mapped?" successfully":" did not", read, node->fragment->fragment_sequence, pwi, graph->seed_table[ii].b); // DEB
//                                
//                                
////                                exit(0); // DEB
//                            } // end this position was not already tested with this read
//                        } // end each occurrence of the seed on the graph
//                    } // end this seed is indexed
//                } // end each position on the read
//                graph->bv->reinit();
//                revcomp(read,size_read); // know switch to the reverse complement for the second run
//            } // end the two directions
//        } // end each read
//    } // end each file
//
// }