// RUN: %compile-cxx
// RUN: %must-run-ddl %mpiexec-numproc-flag 4 %t.exe 2>&1 | %filecheck %s

// CHECK: [MUST-REPORT]{{.*The application issued a set of MPI calls that can cause a deadlock!}}
// CHECK-DAG: MPI_Waitsome@rank 0
// CHECK-DAG: MPI_Recv

/**
 * @file NbrWaitsomeDl.cpp
 * Test case that uses MPI_Waitsome to wait on MPI_Ineighbor_allgather calls
 * on a periodic cartesian communicator 
 *
 * Description:
 * This test case deadlocks due to rank 0 waiting on MPI_Ineighbor_allgather calls 
 * before issuing its MPI_Send calls, while the other ranks block in MPI_Recv before entering 
 * the collectives. (Error)
 *
 * @author Cornelius Pätzold
 */

#include <mpi.h>
#include <iostream>
#include <vector>

#define N 4

int main(int argc, char** argv)
{
    MPI_Init(&argc, &argv);

    int world_rank, world_size;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    int dims[2] = {0, 0};
    MPI_Dims_create(world_size, 2, dims);
    int periods[2] = {1, 1}; // Make cart comm periodic in both dimensions
    MPI_Comm cart_comm;
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart_comm);

    int cart_rank;
    MPI_Comm_rank(cart_comm, &cart_rank);

    std::vector<int> sendbuf(4, cart_rank);
    std::vector<std::vector<int>> recvbuf(N, std::vector<int>(4, -1));

    std::vector<MPI_Request> requests(N);
    std::vector<int> indices(N);
    int p2pBuffer = 0;

    if (world_rank == 0) {
        for (int i = 0; i < N; i++) {
            MPI_Ineighbor_allgather(
                sendbuf.data(),
                1,
                MPI_INT,
                recvbuf[i].data(),
                1,
                MPI_INT,
                cart_comm,
                &requests[i]);
        }

        int outCount;
        MPI_Waitsome(N, requests.data(), &outCount, indices.data(), MPI_STATUS_IGNORE);
        for (int i = 1; i < world_size; i++) {
            MPI_Send(&p2pBuffer, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
        }
    } else {
        MPI_Recv(&p2pBuffer, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for (int i = 0; i < N; i++) {
            MPI_Ineighbor_allgather(
                sendbuf.data(),
                1,
                MPI_INT,
                recvbuf[i].data(),
                1,
                MPI_INT,
                cart_comm,
                &requests[i]);
        }

        int outCount;
        MPI_Waitsome(N, requests.data(), &outCount, indices.data(), MPI_STATUS_IGNORE);
    }

    MPI_Comm_free(&cart_comm);
    MPI_Finalize();
    return 0;
}