diff --git a/cfg/yolo.cfg b/cfg/yolo.cfg
index 2d31027d602e12f94871ede69e4fe8f406ccb7ba..7c37c3adaaf8e7ea445859bd13cd183538fadfab 100644
--- a/cfg/yolo.cfg
+++ b/cfg/yolo.cfg
@@ -1,8 +1,8 @@
 [net]
 batch=64
 subdivisions=8
-height=608
-width=608
+height=416
+width=416
 channels=3
 momentum=0.9
 decay=0.0005
diff --git a/src/region_layer.c b/src/region_layer.c
index 5a3794a51caffd8a4d87a85858327c7d3fd8f2d0..7b57e60921d2e64d416e1b74c8c018232a19b5c9 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -148,6 +148,14 @@ void forward_region_layer(const layer l, network_state state)
     memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
 
 #ifndef GPU
+    for (b = 0; b < l.batch; ++b){
+        for(n = 0; n < l.n; ++n){
+            int index = entry_index(l, b, n*l.w*l.h, 0);
+            activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
+            index = entry_index(l, b, n*l.w*l.h, 4);
+            activate_array(l.output + index,   l.w*l.h, LOGISTIC);
+        }
+    }
     if (l.softmax_tree){
         int i;
         int count = 5;
@@ -157,7 +165,8 @@ void forward_region_layer(const layer l, network_state state)
             count += group_size;
         }
     } else if (l.softmax){
-        softmax_cpu(state.input + 5, l.classes, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + 5);
+        int index = entry_index(l, 0, 0, 5);
+        softmax_cpu(state.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
     }
 #endif
 
@@ -429,7 +438,7 @@ void forward_region_layer_gpu(const layer l, network_state state)
     cpu_state.truth = truth_cpu;
     cpu_state.input = in_cpu;
     forward_region_layer(l, cpu_state);
-    cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
+    //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
     free(cpu_state.input);
     if(!state.train) return;
     cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);