diff --git a/src/axpy.cl b/src/axpy.cl index 901a826637b2424a1a8a36985cce5c6fa84851c7..396389d98c4ae60663ed3820468af72e15ff338b 100644 --- a/src/axpy.cl +++ b/src/axpy.cl @@ -10,6 +10,12 @@ __kernel void scal(int N, float ALPHA, __global float *X, int INCX) X[i*INCX] *= ALPHA; } +__kernel void mask(int n, __global float *x, __global float *mask, int mod) +{ + int i = get_global_id(0); + x[i] = (mask[(i/mod)*mod]) ? x[i] : 0; +} + __kernel void copy(int N, __global float *X, int OFFX, int INCX, __global float *Y, int OFFY, int INCY) { int i = get_global_id(0); diff --git a/src/cnn.c b/src/cnn.c index 29f9565210c9dd4530ff8d85aa8dc2bacb0699bb..8a4899c53b74d794e7d6b3ee88fe9b8cf5cd547a 100644 --- a/src/cnn.c +++ b/src/cnn.c @@ -314,15 +314,14 @@ void train_detection_net() int imgs = 1000/net.batch+1; srand(time(0)); int i = 0; - char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); - list *plist = get_paths("/data/imagenet/cls.train.list"); + list *plist = get_paths("/home/pjreddie/data/imagenet/horse.txt"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); clock_t time; while(1){ i += 1; time=clock(); - data train = load_data_random(imgs*net.batch, paths, plist->size, labels, 1000, 256, 256); + data train = load_data_detection_random(imgs*net.batch, paths, plist->size, 256, 256, 8, 8, 256); //translate_data_rows(train, -144); normalize_data_rows(train); printf("Loaded: %lf seconds\n", sec(clock()-time)); @@ -346,7 +345,7 @@ void train_imagenet() { float avg_loss = 1; //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg"); - network net = parse_network_cfg("cfg/alexnet.cfg"); + network net = parse_network_cfg("cfg/trained_alexnet.cfg"); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1000/net.batch+1; srand(time(0)); @@ -412,6 +411,29 @@ void validate_imagenet(char *filename) } } +void test_detection() +{ + network net = parse_network_cfg("cfg/detnet_test.cfg"); + //imgs=1; + srand(2222222); + int i = 0; + clock_t time; + char filename[256]; + int indexes[10]; + while(1){ + fgets(filename, 256, stdin); + image im = load_image_color(filename, 256, 256); + z_normalize_image(im); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time)); + free_image(im); + } +} + void test_imagenet() { network net = parse_network_cfg("cfg/imagenet_test.cfg"); @@ -717,6 +739,7 @@ int main(int argc, char *argv[]) return 0; } if(0==strcmp(argv[1], "train")) train_imagenet(); + else if(0==strcmp(argv[1], "detection")) train_detection_net(); else if(0==strcmp(argv[1], "asirra")) train_asirra(); else if(0==strcmp(argv[1], "nist")) train_nist(); else if(0==strcmp(argv[1], "test_correct")) test_gpu_net(); @@ -726,7 +749,6 @@ int main(int argc, char *argv[]) #ifdef GPU else if(0==strcmp(argv[1], "test_gpu")) test_gpu_blas(); #endif - test_parser(); fprintf(stderr, "Success!\n"); return 0; } diff --git a/src/cost_layer.c b/src/cost_layer.c index 1df0ed4f527837f233fb3850aec43ed0d6d96c2c..6614b94896f32feff15d6cb5a99b07b52a80ad47 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -2,15 +2,36 @@ #include "utils.h" #include "mini_blas.h" #include <math.h> +#include <string.h> #include <stdlib.h> #include <stdio.h> -cost_layer *make_cost_layer(int batch, int inputs) +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "detection")==0) return DETECTION; + fprintf(stderr, "Couldn't find activation function %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SSE: + return "sse"; + case DETECTION: + return "detection"; + } + return "sse"; +} + +cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type) { fprintf(stderr, "Cost Layer: %d inputs\n", inputs); cost_layer *layer = calloc(1, sizeof(cost_layer)); layer->batch = batch; layer->inputs = inputs; + layer->type = type; layer->delta = calloc(inputs*batch, sizeof(float)); layer->output = calloc(1, sizeof(float)); #ifdef GPU @@ -24,6 +45,12 @@ void forward_cost_layer(cost_layer layer, float *input, float *truth) if (!truth) return; copy_cpu(layer.batch*layer.inputs, truth, 1, layer.delta, 1); axpy_cpu(layer.batch*layer.inputs, -1, input, 1, layer.delta, 1); + if(layer.type == DETECTION){ + int i; + for(i = 0; i < layer.batch*layer.inputs; ++i){ + if((i%5) && !truth[(i/5)*5]) layer.delta[i] = 0; + } + } *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); } @@ -33,6 +60,38 @@ void backward_cost_layer(const cost_layer layer, float *input, float *delta) } #ifdef GPU + +cl_kernel get_mask_kernel() +{ + static int init = 0; + static cl_kernel kernel; + if(!init){ + kernel = get_kernel("src/axpy.cl", "mask", 0); + init = 1; + } + return kernel; +} + +void mask_ongpu(int n, cl_mem x, cl_mem mask, int mod) +{ + cl_setup(); + cl_kernel kernel = get_mask_kernel(); + cl_command_queue queue = cl.queue; + + cl_uint i = 0; + cl.error = clSetKernelArg(kernel, i++, sizeof(n), (void*) &n); + cl.error = clSetKernelArg(kernel, i++, sizeof(x), (void*) &x); + cl.error = clSetKernelArg(kernel, i++, sizeof(mask), (void*) &mask); + cl.error = clSetKernelArg(kernel, i++, sizeof(mod), (void*) &mod); + check_error(cl); + + const size_t global_size[] = {n}; + + cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); + check_error(cl); + +} + void forward_cost_layer_gpu(cost_layer layer, cl_mem input, cl_mem truth) { if (!truth) return; @@ -40,6 +99,10 @@ void forward_cost_layer_gpu(cost_layer layer, cl_mem input, cl_mem truth) copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_cl, 1); axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_cl, 1); + if(layer.type==DETECTION){ + mask_ongpu(layer.inputs*layer.batch, layer.delta_cl, truth, 5); + } + cl_read_array(layer.delta_cl, layer.delta, layer.batch*layer.inputs); *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); //printf("%f\n", *layer.output); diff --git a/src/cost_layer.h b/src/cost_layer.h index edda8f95970bc25a681e5f8fbee9ffe00a3a3b2d..2f1cd5510521bb4c1b4cfef0a633ced80dee2c5f 100644 --- a/src/cost_layer.h +++ b/src/cost_layer.h @@ -2,17 +2,24 @@ #define COST_LAYER_H #include "opencl.h" +typedef enum{ + SSE, DETECTION +} COST_TYPE; + typedef struct { int inputs; int batch; float *delta; float *output; + COST_TYPE type; #ifdef GPU cl_mem delta_cl; #endif } cost_layer; -cost_layer *make_cost_layer(int batch, int inputs); +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type); void forward_cost_layer(const cost_layer layer, float *input, float *truth); void backward_cost_layer(const cost_layer layer, float *input, float *delta); diff --git a/src/data.c b/src/data.c index 9b57391c2bec49c0cbdf5ea4ae5a63ce16404d71..3627fcba5767280c302bb421da7b447284e52812 100644 --- a/src/data.c +++ b/src/data.c @@ -26,6 +26,7 @@ void fill_truth_detection(char *path, float *truth, int height, int width, int n char *labelpath = find_replace(path, "imgs", "det"); labelpath = find_replace(labelpath, ".JPEG", ".txt"); FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); int x, y, h, w; while(fscanf(file, "%d %d %d %d", &x, &y, &w, &h) == 4){ int i = x/box_width; @@ -34,6 +35,7 @@ void fill_truth_detection(char *path, float *truth, int height, int width, int n float dw = (float)(y%box_width)/box_width; float sh = h/scale; float sw = w/scale; + //printf("%d %d %f %f\n", i, j, dh, dw); int index = (i+j*num_width)*5; truth[index++] = 1; truth[index++] = dh; @@ -41,6 +43,7 @@ void fill_truth_detection(char *path, float *truth, int height, int width, int n truth[index++] = sh; truth[index++] = sw; } + fclose(file); } void fill_truth(char *path, char **labels, int k, float *truth) @@ -125,7 +128,7 @@ void free_data(data d) } } -data load_data_detection_random(int n, char **paths, int m, char **labels, int h, int w, int nh, int nw, float scale) +data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw, float scale) { char **random_paths = calloc(n, sizeof(char*)); int i; diff --git a/src/data.h b/src/data.h index 366a382b8525192e9a193dafdeb4a1ff65037427..b9707c966fb5670be4010966e4e1f71b9fd85400 100644 --- a/src/data.h +++ b/src/data.h @@ -14,7 +14,7 @@ typedef struct{ void free_data(data d); data load_data(char **paths, int n, char **labels, int k, int h, int w); data load_data_random(int n, char **paths, int m, char **labels, int k, int h, int w); -data load_data_detection_random(int n, char **paths, int m, char **labels, int h, int w, int nh, int nw, float scale); +data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw, float scale); data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); data load_cifar10_data(char *filename); data load_all_cifar10(); diff --git a/src/parser.c b/src/parser.c index 79d4a3a99f04533b30b8c50f70120d09466608e1..206975395dbb36f7e1bc675fa60c27bac1d1655c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -165,7 +165,9 @@ cost_layer *parse_cost(list *options, network *net, int count) }else{ input = get_network_output_size_layer(*net, count-1); } - cost_layer *layer = make_cost_layer(net->batch, input); + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + cost_layer *layer = make_cost_layer(net->batch, input, type); option_unused(options); return layer; } @@ -565,7 +567,7 @@ void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count) void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count) { - fprintf(fp, "[cost]\n"); + fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type)); if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); fprintf(fp, "\n"); }