#define NUM_NEIGHS 8
#define ALIVE ((uint4) ( 0x0, 0x0, 0x0, 0xFF ))
#define DEAD ((uint4) ( 0xFF, 0xFF, 0xFF, 0xFF ))
#define IS_ALIVE(cell) (cell.x == 0x00)
__constant int2 neighbors[] = {
    (int2) (-1,-1), (int2) (0,-1), (int2) (1,-1), (int2) (1,0),
    (int2) (1,1), (int2) (0,1), (int2) (-1,1), (int2) (-1,0)};
__constant uint2 live_rule = (uint2) (2, 3);
__constant uint2 dead_rule = (uint2) (3, 3);
__constant sampler_t sampler =
    CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
__kernel void ca(__read_only image2d_t in_img, __write_only image2d_t out_img) {
    
    int2 imdim = get_image_dim(in_img);
    
    int2 coord = (int2) (get_global_id(0), get_global_id(1));
    
    if (all(coord < imdim)) {
        
        uint4 neighs_state;
        
        uint neighs_alive = 0;
        
        uint4 state;
        
        uint alive;
        
        uint4 new_state = DEAD;
        
        for(int i = 0; i < NUM_NEIGHS; ++i) {
            
            int2 n = coord + neighbors[i];
            n = select(n, n - imdim, n >= imdim);
            n = select(n, imdim - 1, n < 0);
            
            neighs_state = read_imageui(in_img, sampler, n);
            
            if (IS_ALIVE(neighs_state)) neighs_alive++;
        }
        
        state = read_imageui(in_img, sampler, coord);
        alive = IS_ALIVE(state);
        
        if ((alive && (neighs_alive >= live_rule.s0)
                && (neighs_alive <= live_rule.s1))
            || (!alive && (neighs_alive >= dead_rule.s0)
                && (neighs_alive <= dead_rule.s1))) {
            new_state = (uint4) { 0x00, 0x00, 0x00, 0xFF };
        }
        
        write_imageui(out_img, coord, new_state);
    }
}