diff --git a/cfg/writing.cfg b/cfg/writing.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..2c84b197d92cdb147174c5bcf8d5194af7abf721
--- /dev/null
+++ b/cfg/writing.cfg
@@ -0,0 +1,49 @@
+[net]
+batch=64
+subdivisions=1
+height=256
+width=256
+channels=3
+learning_rate=0.00001
+momentum=0.9
+decay=0.0005
+seen=0
+
+[crop]
+crop_height=256
+crop_width=256
+flip=0
+angle=0
+saturation=1
+exposure=1
+
+[convolutional]
+filters=32
+size=3
+stride=1
+pad=1
+activation=ramp
+
+[convolutional]
+filters=32
+size=3
+stride=1
+pad=1
+activation=ramp
+
+[convolutional]
+filters=32
+size=3
+stride=1
+pad=1
+activation=ramp
+
+[convolutional]
+filters=1
+size=5
+stride=1
+pad=1
+activation=logistic
+
+[cost]
+
diff --git a/src/data.c b/src/data.c
index 17772c1c5625cc0bbe299e902f7ca2863f0ae173..7ef154663155786d232264ad4d09082755107505 100644
--- a/src/data.c
+++ b/src/data.c
@@ -54,7 +54,12 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
-        image im = load_image(paths[i], w, h, 1);
+        image im = load_image(paths[i], w, h, 3);
+
+        image gray = grayscale_image(im);
+        free_image(im);
+        im = gray;
+
         X.vals[i] = im.data;
         X.cols = im.h*im.w*im.c;
     }
@@ -571,14 +576,14 @@ pthread_t load_data_in_thread(load_args args)
     return thread;
 }
 
-data load_data_writing(char **paths, int n, int m, int w, int h)
+data load_data_writing(char **paths, int n, int m, int w, int h, int downsample)
 {
     if(m) paths = get_random_paths(paths, n, m);
     char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png");
     data d;
     d.shallow = 0;
     d.X = load_image_paths(paths, n, w, h);
-    d.y = load_image_paths_gray(replace_paths, n, w/8, h/8);
+    d.y = load_image_paths_gray(replace_paths, n, w/downsample, h/downsample);
     if(m) free(paths);
     int i;
     for(i = 0; i < n; ++i) free(replace_paths[i]);
diff --git a/src/data.h b/src/data.h
index 216ab0c4bd0379f5f74031d83c34bc44acbc7556..1343460cba2bc5b3fe7a81acec0389a161d0234a 100644
--- a/src/data.h
+++ b/src/data.h
@@ -68,7 +68,7 @@ box_label *read_boxes(char *filename, int *n);
 data load_cifar10_data(char *filename);
 data load_all_cifar10();
 
-data load_data_writing(char **paths, int n, int m, int w, int h);
+data load_data_writing(char **paths, int n, int m, int w, int h, int downsample);
 
 list *get_paths(char *filename);
 char **get_labels(char *filename);
diff --git a/src/image.c b/src/image.c
index b6d757788c6e06ac502cb68882df5b6786cd3ad8..fa48678eee7615d1964a74ae5a588a98453d34f3 100644
--- a/src/image.c
+++ b/src/image.c
@@ -241,650 +241,648 @@ void show_image_cv(image p, char *name)
     }
     cvShowImage(buff, disp);
     cvReleaseImage(&disp);
-    }
+}
 #endif
 
-    void show_image(image p, char *name)
-    {
+void show_image(image p, char *name)
+{
 #ifdef OPENCV
-        show_image_cv(p, name);
+    show_image_cv(p, name);
 #else
-        fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
-        save_image(p, name);
+    fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
+    save_image(p, name);
 #endif
-    }
+}
 
-    void save_image(image im, char *name)
-    {
-        char buff[256];
-        //sprintf(buff, "%s (%d)", name, windows);
-        sprintf(buff, "%s.png", name);
-        unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
-        int i,k;
-        for(k = 0; k < im.c; ++k){
-            for(i = 0; i < im.w*im.h; ++i){
-                data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
-            }
+void save_image(image im, char *name)
+{
+    char buff[256];
+    //sprintf(buff, "%s (%d)", name, windows);
+    sprintf(buff, "%s.png", name);
+    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    int i,k;
+    for(k = 0; k < im.c; ++k){
+        for(i = 0; i < im.w*im.h; ++i){
+            data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
         }
-        int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
-        free(data);
-        if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
     }
+    int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
+    free(data);
+    if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
+}
 
 #ifdef OPENCV
-    void save_image_jpg(image p, char *name)
-    {
-        image copy = copy_image(p);
-        rgbgr_image(copy);
-        int x,y,k;
-
-        char buff[256];
-        sprintf(buff, "%s.jpg", name);
-
-        IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
-        int step = disp->widthStep;
-        for(y = 0; y < p.h; ++y){
-            for(x = 0; x < p.w; ++x){
-                for(k= 0; k < p.c; ++k){
-                    disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
-                }
+void save_image_jpg(image p, char *name)
+{
+    image copy = copy_image(p);
+    rgbgr_image(copy);
+    int x,y,k;
+
+    char buff[256];
+    sprintf(buff, "%s.jpg", name);
+
+    IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
+    int step = disp->widthStep;
+    for(y = 0; y < p.h; ++y){
+        for(x = 0; x < p.w; ++x){
+            for(k= 0; k < p.c; ++k){
+                disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
             }
         }
-        cvSaveImage(buff, disp,0);
-        cvReleaseImage(&disp);
-        free_image(copy);
     }
-    #endif
+    cvSaveImage(buff, disp,0);
+    cvReleaseImage(&disp);
+    free_image(copy);
+}
+#endif
 
-    void show_image_layers(image p, char *name)
-    {
-        int i;
-        char buff[256];
-        for(i = 0; i < p.c; ++i){
-            sprintf(buff, "%s - Layer %d", name, i);
-            image layer = get_image_layer(p, i);
-            show_image(layer, buff);
-            free_image(layer);
-        }
+void show_image_layers(image p, char *name)
+{
+    int i;
+    char buff[256];
+    for(i = 0; i < p.c; ++i){
+        sprintf(buff, "%s - Layer %d", name, i);
+        image layer = get_image_layer(p, i);
+        show_image(layer, buff);
+        free_image(layer);
     }
+}
 
-    void show_image_collapsed(image p, char *name)
-    {
-        image c = collapse_image_layers(p, 1);
-        show_image(c, name);
-        free_image(c);
-    }
+void show_image_collapsed(image p, char *name)
+{
+    image c = collapse_image_layers(p, 1);
+    show_image(c, name);
+    free_image(c);
+}
 
-    image make_empty_image(int w, int h, int c)
-    {
-        image out;
-        out.data = 0;
-        out.h = h;
-        out.w = w;
-        out.c = c;
-        return out;
-    }
+image make_empty_image(int w, int h, int c)
+{
+    image out;
+    out.data = 0;
+    out.h = h;
+    out.w = w;
+    out.c = c;
+    return out;
+}
 
-    image make_image(int w, int h, int c)
-    {
-        image out = make_empty_image(w,h,c);
-        out.data = calloc(h*w*c, sizeof(float));
-        return out;
-    }
+image make_image(int w, int h, int c)
+{
+    image out = make_empty_image(w,h,c);
+    out.data = calloc(h*w*c, sizeof(float));
+    return out;
+}
 
-    image float_to_image(int w, int h, int c, float *data)
-    {
-        image out = make_empty_image(w,h,c);
-        out.data = data;
-        return out;
-    }
+image float_to_image(int w, int h, int c, float *data)
+{
+    image out = make_empty_image(w,h,c);
+    out.data = data;
+    return out;
+}
 
-    image rotate_image(image im, float rad)
-    {
-        int x, y, c;
-        float cx = im.w/2.;
-        float cy = im.h/2.;
-        image rot = make_image(im.w, im.h, im.c);
-        for(c = 0; c < im.c; ++c){
-            for(y = 0; y < im.h; ++y){
-                for(x = 0; x < im.w; ++x){
-                    float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
-                    float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
-                    float val = bilinear_interpolate(im, rx, ry, c);
-                    set_pixel(rot, x, y, c, val);
-                }
+image rotate_image(image im, float rad)
+{
+    int x, y, c;
+    float cx = im.w/2.;
+    float cy = im.h/2.;
+    image rot = make_image(im.w, im.h, im.c);
+    for(c = 0; c < im.c; ++c){
+        for(y = 0; y < im.h; ++y){
+            for(x = 0; x < im.w; ++x){
+                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
+                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
+                float val = bilinear_interpolate(im, rx, ry, c);
+                set_pixel(rot, x, y, c, val);
             }
         }
-        return rot;
     }
+    return rot;
+}
 
-    void translate_image(image m, float s)
-    {
-        int i;
-        for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
-    }
+void translate_image(image m, float s)
+{
+    int i;
+    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
+}
 
-    void scale_image(image m, float s)
-    {
-        int i;
-        for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
-    }
+void scale_image(image m, float s)
+{
+    int i;
+    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
+}
 
-    image crop_image(image im, int dx, int dy, int w, int h)
-    {
-        image cropped = make_image(w, h, im.c);
-        int i, j, k;
-        for(k = 0; k < im.c; ++k){
-            for(j = 0; j < h; ++j){
-                for(i = 0; i < w; ++i){
-                    int r = j + dy;
-                    int c = i + dx;
-                    float val = 0;
-                    if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
-                        val = get_pixel(im, c, r, k);
-                    }
-                    set_pixel(cropped, i, j, k, val);
+image crop_image(image im, int dx, int dy, int w, int h)
+{
+    image cropped = make_image(w, h, im.c);
+    int i, j, k;
+    for(k = 0; k < im.c; ++k){
+        for(j = 0; j < h; ++j){
+            for(i = 0; i < w; ++i){
+                int r = j + dy;
+                int c = i + dx;
+                float val = 0;
+                if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
+                    val = get_pixel(im, c, r, k);
                 }
+                set_pixel(cropped, i, j, k, val);
             }
         }
-        return cropped;
     }
+    return cropped;
+}
 
-    float three_way_max(float a, float b, float c)
-    {
-        return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ;
-    }
+float three_way_max(float a, float b, float c)
+{
+    return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ;
+}
 
-    float three_way_min(float a, float b, float c)
-    {
-        return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
-    }
+float three_way_min(float a, float b, float c)
+{
+    return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
+}
 
-    // http://www.cs.rit.edu/~ncs/color/t_convert.html
-    void rgb_to_hsv(image im)
-    {
-        assert(im.c == 3);
-        int i, j;
-        float r, g, b;
-        float h, s, v;
-        for(j = 0; j < im.h; ++j){
-            for(i = 0; i < im.w; ++i){
-                r = get_pixel(im, i , j, 0);
-                g = get_pixel(im, i , j, 1);
-                b = get_pixel(im, i , j, 2);
-                float max = three_way_max(r,g,b);
-                float min = three_way_min(r,g,b);
-                float delta = max - min;
-                v = max;
-                if(max == 0){
-                    s = 0;
-                    h = -1;
-                }else{
-                    s = delta/max;
-                    if(r == max){
-                        h = (g - b) / delta;
-                    } else if (g == max) {
-                        h = 2 + (b - r) / delta;
-                    } else {
-                        h = 4 + (r - g) / delta;
-                    }
-                    if (h < 0) h += 6;
+// http://www.cs.rit.edu/~ncs/color/t_convert.html
+void rgb_to_hsv(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            r = get_pixel(im, i , j, 0);
+            g = get_pixel(im, i , j, 1);
+            b = get_pixel(im, i , j, 2);
+            float max = three_way_max(r,g,b);
+            float min = three_way_min(r,g,b);
+            float delta = max - min;
+            v = max;
+            if(max == 0){
+                s = 0;
+                h = -1;
+            }else{
+                s = delta/max;
+                if(r == max){
+                    h = (g - b) / delta;
+                } else if (g == max) {
+                    h = 2 + (b - r) / delta;
+                } else {
+                    h = 4 + (r - g) / delta;
                 }
-                set_pixel(im, i, j, 0, h);
-                set_pixel(im, i, j, 1, s);
-                set_pixel(im, i, j, 2, v);
+                if (h < 0) h += 6;
             }
+            set_pixel(im, i, j, 0, h);
+            set_pixel(im, i, j, 1, s);
+            set_pixel(im, i, j, 2, v);
         }
     }
+}
 
-    void hsv_to_rgb(image im)
-    {
-        assert(im.c == 3);
-        int i, j;
-        float r, g, b;
-        float h, s, v;
-        float f, p, q, t;
-        for(j = 0; j < im.h; ++j){
-            for(i = 0; i < im.w; ++i){
-                h = get_pixel(im, i , j, 0);
-                s = get_pixel(im, i , j, 1);
-                v = get_pixel(im, i , j, 2);
-                if (s == 0) {
-                    r = g = b = v;
+void hsv_to_rgb(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    float f, p, q, t;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            h = get_pixel(im, i , j, 0);
+            s = get_pixel(im, i , j, 1);
+            v = get_pixel(im, i , j, 2);
+            if (s == 0) {
+                r = g = b = v;
+            } else {
+                int index = floor(h);
+                f = h - index;
+                p = v*(1-s);
+                q = v*(1-s*f);
+                t = v*(1-s*(1-f));
+                if(index == 0){
+                    r = v; g = t; b = p;
+                } else if(index == 1){
+                    r = q; g = v; b = p;
+                } else if(index == 2){
+                    r = p; g = v; b = t;
+                } else if(index == 3){
+                    r = p; g = q; b = v;
+                } else if(index == 4){
+                    r = t; g = p; b = v;
                 } else {
-                    int index = floor(h);
-                    f = h - index;
-                    p = v*(1-s);
-                    q = v*(1-s*f);
-                    t = v*(1-s*(1-f));
-                    if(index == 0){
-                        r = v; g = t; b = p;
-                    } else if(index == 1){
-                        r = q; g = v; b = p;
-                    } else if(index == 2){
-                        r = p; g = v; b = t;
-                    } else if(index == 3){
-                        r = p; g = q; b = v;
-                    } else if(index == 4){
-                        r = t; g = p; b = v;
-                    } else {
-                        r = v; g = p; b = q;
-                    }
+                    r = v; g = p; b = q;
                 }
-                set_pixel(im, i, j, 0, r);
-                set_pixel(im, i, j, 1, g);
-                set_pixel(im, i, j, 2, b);
             }
+            set_pixel(im, i, j, 0, r);
+            set_pixel(im, i, j, 1, g);
+            set_pixel(im, i, j, 2, b);
         }
     }
+}
 
-    image grayscale_image(image im)
-    {
-        assert(im.c == 3);
-        int i, j, k;
-        image gray = make_image(im.w, im.h, im.c);
-        float scale[] = {0.587, 0.299, 0.114};
-        for(k = 0; k < im.c; ++k){
-            for(j = 0; j < im.h; ++j){
-                for(i = 0; i < im.w; ++i){
-                    gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k);
-                }
+image grayscale_image(image im)
+{
+    assert(im.c == 3);
+    int i, j, k;
+    image gray = make_image(im.w, im.h, 1);
+    float scale[] = {0.587, 0.299, 0.114};
+    for(k = 0; k < im.c; ++k){
+        for(j = 0; j < im.h; ++j){
+            for(i = 0; i < im.w; ++i){
+                gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k);
             }
         }
-        memcpy(gray.data + im.w*im.h*1, gray.data, sizeof(float)*im.w*im.h);
-        memcpy(gray.data + im.w*im.h*2, gray.data, sizeof(float)*im.w*im.h);
-        return gray;
     }
+    return gray;
+}
 
-    image blend_image(image fore, image back, float alpha)
-    {
-        assert(fore.w == back.w && fore.h == back.h && fore.c == back.c);
-        image blend = make_image(fore.w, fore.h, fore.c);
-        int i, j, k;
-        for(k = 0; k < fore.c; ++k){
-            for(j = 0; j < fore.h; ++j){
-                for(i = 0; i < fore.w; ++i){
-                    float val = alpha * get_pixel(fore, i, j, k) + 
-                        (1 - alpha)* get_pixel(back, i, j, k);
-                    set_pixel(blend, i, j, k, val);
-                }
+image blend_image(image fore, image back, float alpha)
+{
+    assert(fore.w == back.w && fore.h == back.h && fore.c == back.c);
+    image blend = make_image(fore.w, fore.h, fore.c);
+    int i, j, k;
+    for(k = 0; k < fore.c; ++k){
+        for(j = 0; j < fore.h; ++j){
+            for(i = 0; i < fore.w; ++i){
+                float val = alpha * get_pixel(fore, i, j, k) + 
+                    (1 - alpha)* get_pixel(back, i, j, k);
+                set_pixel(blend, i, j, k, val);
             }
         }
-        return blend;
     }
+    return blend;
+}
 
-    void scale_image_channel(image im, int c, float v)
-    {
-        int i, j;
-        for(j = 0; j < im.h; ++j){
-            for(i = 0; i < im.w; ++i){
-                float pix = get_pixel(im, i, j, c);
-                pix = pix*v;
-                set_pixel(im, i, j, c, pix);
-            }
+void scale_image_channel(image im, int c, float v)
+{
+    int i, j;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            float pix = get_pixel(im, i, j, c);
+            pix = pix*v;
+            set_pixel(im, i, j, c, pix);
         }
     }
+}
 
-    void saturate_image(image im, float sat)
-    {
-        rgb_to_hsv(im);
-        scale_image_channel(im, 1, sat);
-        hsv_to_rgb(im);
-        constrain_image(im);
-    }
-
-    void exposure_image(image im, float sat)
-    {
-        rgb_to_hsv(im);
-        scale_image_channel(im, 2, sat);
-        hsv_to_rgb(im);
-        constrain_image(im);
-    }
-
-    void saturate_exposure_image(image im, float sat, float exposure)
-    {
-        rgb_to_hsv(im);
-        scale_image_channel(im, 1, sat);
-        scale_image_channel(im, 2, exposure);
-        hsv_to_rgb(im);
-        constrain_image(im);
-    }
+void saturate_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
 
-    /*
-       image saturate_image(image im, float sat)
-       {
-       image gray = grayscale_image(im);
-       image blend = blend_image(im, gray, sat);
-       free_image(gray);
-       constrain_image(blend);
-       return blend;
-       }
+void exposure_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 2, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
 
-       image brightness_image(image im, float b)
-       {
-       image bright = make_image(im.w, im.h, im.c);
-       return bright;
-       }
-     */
+void saturate_exposure_image(image im, float sat, float exposure)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    scale_image_channel(im, 2, exposure);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
 
-    float bilinear_interpolate(image im, float x, float y, int c)
-    {
-        int ix = (int) floorf(x);
-        int iy = (int) floorf(y);
+/*
+   image saturate_image(image im, float sat)
+   {
+   image gray = grayscale_image(im);
+   image blend = blend_image(im, gray, sat);
+   free_image(gray);
+   constrain_image(blend);
+   return blend;
+   }
+
+   image brightness_image(image im, float b)
+   {
+   image bright = make_image(im.w, im.h, im.c);
+   return bright;
+   }
+ */
+
+float bilinear_interpolate(image im, float x, float y, int c)
+{
+    int ix = (int) floorf(x);
+    int iy = (int) floorf(y);
 
-        float dx = x - ix;
-        float dy = y - iy;
+    float dx = x - ix;
+    float dy = y - iy;
 
-        float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
-            dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
-            (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
-            dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
-        return val;
-    }
+    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
+        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
+        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
+        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
+    return val;
+}
 
-    image resize_image(image im, int w, int h)
-    {
-        image resized = make_image(w, h, im.c);   
-        image part = make_image(w, im.h, im.c);
-        int r, c, k;
-        float w_scale = (float)(im.w - 1) / (w - 1);
-        float h_scale = (float)(im.h - 1) / (h - 1);
-        for(k = 0; k < im.c; ++k){
-            for(r = 0; r < im.h; ++r){
-                for(c = 0; c < w; ++c){
-                    float val = 0;
-                    if(c == w-1){
-                        val = get_pixel(im, im.w-1, r, k);
-                    } else {
-                        float sx = c*w_scale;
-                        int ix = (int) sx;
-                        float dx = sx - ix;
-                        val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
-                    }
-                    set_pixel(part, c, r, k, val);
+image resize_image(image im, int w, int h)
+{
+    image resized = make_image(w, h, im.c);   
+    image part = make_image(w, im.h, im.c);
+    int r, c, k;
+    float w_scale = (float)(im.w - 1) / (w - 1);
+    float h_scale = (float)(im.h - 1) / (h - 1);
+    for(k = 0; k < im.c; ++k){
+        for(r = 0; r < im.h; ++r){
+            for(c = 0; c < w; ++c){
+                float val = 0;
+                if(c == w-1){
+                    val = get_pixel(im, im.w-1, r, k);
+                } else {
+                    float sx = c*w_scale;
+                    int ix = (int) sx;
+                    float dx = sx - ix;
+                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
                 }
+                set_pixel(part, c, r, k, val);
             }
         }
-        for(k = 0; k < im.c; ++k){
-            for(r = 0; r < h; ++r){
-                float sy = r*h_scale;
-                int iy = (int) sy;
-                float dy = sy - iy;
-                for(c = 0; c < w; ++c){
-                    float val = (1-dy) * get_pixel(part, c, iy, k);
-                    set_pixel(resized, c, r, k, val);
-                }
-                if(r == h-1) continue;
-                for(c = 0; c < w; ++c){
-                    float val = dy * get_pixel(part, c, iy+1, k);
-                    add_pixel(resized, c, r, k, val);
-                }
+    }
+    for(k = 0; k < im.c; ++k){
+        for(r = 0; r < h; ++r){
+            float sy = r*h_scale;
+            int iy = (int) sy;
+            float dy = sy - iy;
+            for(c = 0; c < w; ++c){
+                float val = (1-dy) * get_pixel(part, c, iy, k);
+                set_pixel(resized, c, r, k, val);
+            }
+            if(r == h-1) continue;
+            for(c = 0; c < w; ++c){
+                float val = dy * get_pixel(part, c, iy+1, k);
+                add_pixel(resized, c, r, k, val);
             }
         }
-
-        free_image(part);
-        return resized;
     }
 
-    void test_resize(char *filename)
-    {
-        image im = load_image(filename, 0,0, 3);
-        image gray = grayscale_image(im);
+    free_image(part);
+    return resized;
+}
+
+void test_resize(char *filename)
+{
+    image im = load_image(filename, 0,0, 3);
+    image gray = grayscale_image(im);
 
-        image sat2 = copy_image(im);
-        saturate_image(sat2, 2);
+    image sat2 = copy_image(im);
+    saturate_image(sat2, 2);
 
-        image sat5 = copy_image(im);
-        saturate_image(sat5, .5);
+    image sat5 = copy_image(im);
+    saturate_image(sat5, .5);
 
-        image exp2 = copy_image(im);
-        exposure_image(exp2, 2);
+    image exp2 = copy_image(im);
+    exposure_image(exp2, 2);
 
-        image exp5 = copy_image(im);
-        exposure_image(exp5, .5);
+    image exp5 = copy_image(im);
+    exposure_image(exp5, .5);
 
-        show_image(im, "Original");
-        show_image(gray, "Gray");
-        show_image(sat2, "Saturation-2");
-        show_image(sat5, "Saturation-.5");
-        show_image(exp2, "Exposure-2");
-        show_image(exp5, "Exposure-.5");
+    show_image(im, "Original");
+    show_image(gray, "Gray");
+    show_image(sat2, "Saturation-2");
+    show_image(sat5, "Saturation-.5");
+    show_image(exp2, "Exposure-2");
+    show_image(exp5, "Exposure-.5");
 #ifdef OPENCV
-        cvWaitKey(0);
+    cvWaitKey(0);
 #endif
-    }
+}
 
 #ifdef OPENCV
-    image ipl_to_image(IplImage* src)
-    {
-        unsigned char *data = (unsigned char *)src->imageData;
-        int h = src->height;
-        int w = src->width;
-        int c = src->nChannels;
-        int step = src->widthStep;
-        image out = make_image(w, h, c);
-        int i, j, k, count=0;;
-
-        for(k= 0; k < c; ++k){
-            for(i = 0; i < h; ++i){
-                for(j = 0; j < w; ++j){
-                    out.data[count++] = data[i*step + j*c + k]/255.;
-                }
+image ipl_to_image(IplImage* src)
+{
+    unsigned char *data = (unsigned char *)src->imageData;
+    int h = src->height;
+    int w = src->width;
+    int c = src->nChannels;
+    int step = src->widthStep;
+    image out = make_image(w, h, c);
+    int i, j, k, count=0;;
+
+    for(k= 0; k < c; ++k){
+        for(i = 0; i < h; ++i){
+            for(j = 0; j < w; ++j){
+                out.data[count++] = data[i*step + j*c + k]/255.;
             }
         }
-        return out;
     }
+    return out;
+}
 
-    image load_image_cv(char *filename, int channels)
-    {
-        IplImage* src = 0;
-        int flag = -1;
-        if (channels == 0) flag = -1;
-        else if (channels == 1) flag = 0;
-        else if (channels == 3) flag = 1;
-        else {
-            fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
-        }
+image load_image_cv(char *filename, int channels)
+{
+    IplImage* src = 0;
+    int flag = -1;
+    if (channels == 0) flag = -1;
+    else if (channels == 1) flag = 0;
+    else if (channels == 3) flag = 1;
+    else {
+        fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
+    }
 
-        if( (src = cvLoadImage(filename, flag)) == 0 )
-        {
-            printf("Cannot load file image %s\n", filename);
-            exit(0);
-        }
-        image out = ipl_to_image(src);
-        cvReleaseImage(&src);
-        rgbgr_image(out);
-        return out;
+    if( (src = cvLoadImage(filename, flag)) == 0 )
+    {
+        printf("Cannot load file image %s\n", filename);
+        exit(0);
     }
+    image out = ipl_to_image(src);
+    cvReleaseImage(&src);
+    rgbgr_image(out);
+    return out;
+}
 
 #endif
 
 
-    image load_image_stb(char *filename, int channels)
-    {
-        int w, h, c;
-        unsigned char *data = stbi_load(filename, &w, &h, &c, channels);
-        if (!data) {
-            fprintf(stderr, "Cannot load file image %s\nSTB Reason: %s\n", filename, stbi_failure_reason());
-            exit(0);
-        }
-        if(channels) c = channels;
-        int i,j,k;
-        image im = make_image(w, h, c);
-        for(k = 0; k < c; ++k){
-            for(j = 0; j < h; ++j){
-                for(i = 0; i < w; ++i){
-                    int dst_index = i + w*j + w*h*k;
-                    int src_index = k + c*i + c*w*j;
-                    im.data[dst_index] = (float)data[src_index]/255.;
-                }
+image load_image_stb(char *filename, int channels)
+{
+    int w, h, c;
+    unsigned char *data = stbi_load(filename, &w, &h, &c, channels);
+    if (!data) {
+        fprintf(stderr, "Cannot load file image %s\nSTB Reason: %s\n", filename, stbi_failure_reason());
+        exit(0);
+    }
+    if(channels) c = channels;
+    int i,j,k;
+    image im = make_image(w, h, c);
+    for(k = 0; k < c; ++k){
+        for(j = 0; j < h; ++j){
+            for(i = 0; i < w; ++i){
+                int dst_index = i + w*j + w*h*k;
+                int src_index = k + c*i + c*w*j;
+                im.data[dst_index] = (float)data[src_index]/255.;
             }
         }
-        free(data);
-        return im;
     }
+    free(data);
+    return im;
+}
 
-    image load_image(char *filename, int w, int h, int c)
-    {
+image load_image(char *filename, int w, int h, int c)
+{
 #ifdef OPENCV
-        image out = load_image_cv(filename, c);
+    image out = load_image_cv(filename, c);
 #else
-        image out = load_image_stb(filename, c);
+    image out = load_image_stb(filename, c);
 #endif
 
-        if((h && w) && (h != out.h || w != out.w)){
-            image resized = resize_image(out, w, h);
-            free_image(out);
-            out = resized;
-        }
-        return out;
+    if((h && w) && (h != out.h || w != out.w)){
+        image resized = resize_image(out, w, h);
+        free_image(out);
+        out = resized;
     }
+    return out;
+}
 
-    image load_image_color(char *filename, int w, int h)
-    {
-        return load_image(filename, w, h, 3);
-    }
+image load_image_color(char *filename, int w, int h)
+{
+    return load_image(filename, w, h, 3);
+}
 
-    image get_image_layer(image m, int l)
-    {
-        image out = make_image(m.w, m.h, 1);
-        int i;
-        for(i = 0; i < m.h*m.w; ++i){
-            out.data[i] = m.data[i+l*m.h*m.w];
-        }
-        return out;
+image get_image_layer(image m, int l)
+{
+    image out = make_image(m.w, m.h, 1);
+    int i;
+    for(i = 0; i < m.h*m.w; ++i){
+        out.data[i] = m.data[i+l*m.h*m.w];
     }
+    return out;
+}
 
-    float get_pixel(image m, int x, int y, int c)
-    {
-        assert(x < m.w && y < m.h && c < m.c);
-        return m.data[c*m.h*m.w + y*m.w + x];
-    }
-    float get_pixel_extend(image m, int x, int y, int c)
-    {
-        if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
-        return get_pixel(m, x, y, c);
-    }
-    void set_pixel(image m, int x, int y, int c, float val)
-    {
-        assert(x < m.w && y < m.h && c < m.c);
-        m.data[c*m.h*m.w + y*m.w + x] = val;
-    }
-    void add_pixel(image m, int x, int y, int c, float val)
-    {
-        assert(x < m.w && y < m.h && c < m.c);
-        m.data[c*m.h*m.w + y*m.w + x] += val;
-    }
+float get_pixel(image m, int x, int y, int c)
+{
+    assert(x < m.w && y < m.h && c < m.c);
+    return m.data[c*m.h*m.w + y*m.w + x];
+}
+float get_pixel_extend(image m, int x, int y, int c)
+{
+    if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
+    return get_pixel(m, x, y, c);
+}
+void set_pixel(image m, int x, int y, int c, float val)
+{
+    assert(x < m.w && y < m.h && c < m.c);
+    m.data[c*m.h*m.w + y*m.w + x] = val;
+}
+void add_pixel(image m, int x, int y, int c, float val)
+{
+    assert(x < m.w && y < m.h && c < m.c);
+    m.data[c*m.h*m.w + y*m.w + x] += val;
+}
 
-    void print_image(image m)
-    {
-        int i, j, k;
-        for(i =0 ; i < m.c; ++i){
-            for(j =0 ; j < m.h; ++j){
-                for(k = 0; k < m.w; ++k){
-                    printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]);
-                    if(k > 30) break;
-                }
-                printf("\n");
-                if(j > 30) break;
+void print_image(image m)
+{
+    int i, j, k;
+    for(i =0 ; i < m.c; ++i){
+        for(j =0 ; j < m.h; ++j){
+            for(k = 0; k < m.w; ++k){
+                printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]);
+                if(k > 30) break;
             }
             printf("\n");
+            if(j > 30) break;
         }
         printf("\n");
     }
+    printf("\n");
+}
 
-    image collapse_images_vert(image *ims, int n)
-    {
-        int color = 1;
-        int border = 1;
-        int h,w,c;
-        w = ims[0].w;
-        h = (ims[0].h + border) * n - border;
-        c = ims[0].c;
-        if(c != 3 || !color){
-            w = (w+border)*c - border;
-            c = 1;
-        }
-
-        image filters = make_image(w, h, c);
-        int i,j;
-        for(i = 0; i < n; ++i){
-            int h_offset = i*(ims[0].h+border);
-            image copy = copy_image(ims[i]);
-            //normalize_image(copy);
-            if(c == 3 && color){
-                embed_image(copy, filters, 0, h_offset);
-            }
-            else{
-                for(j = 0; j < copy.c; ++j){
-                    int w_offset = j*(ims[0].w+border);
-                    image layer = get_image_layer(copy, j);
-                    embed_image(layer, filters, w_offset, h_offset);
-                    free_image(layer);
-                }
+image collapse_images_vert(image *ims, int n)
+{
+    int color = 1;
+    int border = 1;
+    int h,w,c;
+    w = ims[0].w;
+    h = (ims[0].h + border) * n - border;
+    c = ims[0].c;
+    if(c != 3 || !color){
+        w = (w+border)*c - border;
+        c = 1;
+    }
+
+    image filters = make_image(w, h, c);
+    int i,j;
+    for(i = 0; i < n; ++i){
+        int h_offset = i*(ims[0].h+border);
+        image copy = copy_image(ims[i]);
+        //normalize_image(copy);
+        if(c == 3 && color){
+            embed_image(copy, filters, 0, h_offset);
+        }
+        else{
+            for(j = 0; j < copy.c; ++j){
+                int w_offset = j*(ims[0].w+border);
+                image layer = get_image_layer(copy, j);
+                embed_image(layer, filters, w_offset, h_offset);
+                free_image(layer);
             }
-            free_image(copy);
-        }
-        return filters;
-    } 
-
-    image collapse_images_horz(image *ims, int n)
-    {
-        int color = 1;
-        int border = 1;
-        int h,w,c;
-        int size = ims[0].h;
-        h = size;
-        w = (ims[0].w + border) * n - border;
-        c = ims[0].c;
-        if(c != 3 || !color){
-            h = (h+border)*c - border;
-            c = 1;
         }
+        free_image(copy);
+    }
+    return filters;
+} 
 
-        image filters = make_image(w, h, c);
-        int i,j;
-        for(i = 0; i < n; ++i){
-            int w_offset = i*(size+border);
-            image copy = copy_image(ims[i]);
-            //normalize_image(copy);
-            if(c == 3 && color){
-                embed_image(copy, filters, w_offset, 0);
-            }
-            else{
-                for(j = 0; j < copy.c; ++j){
-                    int h_offset = j*(size+border);
-                    image layer = get_image_layer(copy, j);
-                    embed_image(layer, filters, w_offset, h_offset);
-                    free_image(layer);
-                }
+image collapse_images_horz(image *ims, int n)
+{
+    int color = 1;
+    int border = 1;
+    int h,w,c;
+    int size = ims[0].h;
+    h = size;
+    w = (ims[0].w + border) * n - border;
+    c = ims[0].c;
+    if(c != 3 || !color){
+        h = (h+border)*c - border;
+        c = 1;
+    }
+
+    image filters = make_image(w, h, c);
+    int i,j;
+    for(i = 0; i < n; ++i){
+        int w_offset = i*(size+border);
+        image copy = copy_image(ims[i]);
+        //normalize_image(copy);
+        if(c == 3 && color){
+            embed_image(copy, filters, w_offset, 0);
+        }
+        else{
+            for(j = 0; j < copy.c; ++j){
+                int h_offset = j*(size+border);
+                image layer = get_image_layer(copy, j);
+                embed_image(layer, filters, w_offset, h_offset);
+                free_image(layer);
             }
-            free_image(copy);
-        }
-        return filters;
-    } 
-
-    void show_images(image *ims, int n, char *window)
-    {
-        image m = collapse_images_vert(ims, n);
-        /*
-        int w = 448;
-        int h = ((float)m.h/m.w) * 448;
-        if(h > 896){
-            h = 896;
-            w = ((float)m.w/m.h) * 896;
         }
-        image sized = resize_image(m, w, h);
-        */
-        normalize_image(m);
-        image sized = resize_image(m, m.w, m.h);
-        save_image(sized, window);
-        show_image(sized, window);
-        free_image(sized);
-        free_image(m);
+        free_image(copy);
     }
+    return filters;
+} 
 
-    void free_image(image m)
-    {
-        free(m.data);
-    }
+void show_images(image *ims, int n, char *window)
+{
+    image m = collapse_images_vert(ims, n);
+    /*
+       int w = 448;
+       int h = ((float)m.h/m.w) * 448;
+       if(h > 896){
+       h = 896;
+       w = ((float)m.w/m.h) * 896;
+       }
+       image sized = resize_image(m, w, h);
+     */
+    normalize_image(m);
+    image sized = resize_image(m, m.w, m.h);
+    save_image(sized, window);
+    show_image(sized, window);
+    free_image(sized);
+    free_image(m);
+}
+
+void free_image(image m)
+{
+    free(m.data);
+}
diff --git a/src/region_layer.c b/src/region_layer.c
index ecb89c64bda9dc7f339fc777a5fde4f3e2e24234..39af5ee8c528ecf795eac1a28e43d87f5bec89e4 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -61,6 +61,7 @@ void forward_region_layer(const region_layer l, network_state state)
     if(state.train){
         float avg_iou = 0;
         float avg_cat = 0;
+        float avg_allcat = 0;
         float avg_obj = 0;
         float avg_anyobj = 0;
         int count = 0;
@@ -90,6 +91,7 @@ void forward_region_layer(const region_layer l, network_state state)
                     l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
                     *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
                     if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
+                    avg_allcat += l.output[class_index+j];
                 }
 
                 box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
@@ -151,7 +153,7 @@ void forward_region_layer(const region_layer l, network_state state)
                         LOGISTIC, l.delta + index + locations*l.classes);
             }
         }
-        printf("Region Avg IOU: %f, Avg Cat Pred: %f, Avg Obj: %f, Avg Any: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
+        printf("Region Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
     }
 }
 
diff --git a/src/swag.c b/src/swag.c
index 1398192de17fe489af090f52391cad6a86987c05..37dde36a224a38766b3e1bb25965416d205d178d 100644
--- a/src/swag.c
+++ b/src/swag.c
@@ -132,21 +132,22 @@ void train_swag(char *cfgfile, char *weightfile)
 void convert_swag_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes)
 {
     int i,j,n;
-    int per_cell = 5*num+classes;
+    //int per_cell = 5*num+classes;
     for (i = 0; i < side*side; ++i){
         int row = i / side;
         int col = i % side;
         for(n = 0; n < num; ++n){
-            int offset = i*per_cell + 5*n;
-            float scale = predictions[offset];
             int index = i*num + n;
-            boxes[index].x = (predictions[offset + 1] + col) / side * w;
-            boxes[index].y = (predictions[offset + 2] + row) / side * h;
-            boxes[index].w = pow(predictions[offset + 3], (square?2:1)) * w;
-            boxes[index].h = pow(predictions[offset + 4], (square?2:1)) * h;
+            int p_index = side*side*classes + i*num + n;
+            float scale = predictions[p_index];
+            int box_index = side*side*(classes + num) + (i*num + n)*4;
+            boxes[index].x = (predictions[box_index + 0] + col) / side * w;
+            boxes[index].y = (predictions[box_index + 1] + row) / side * h;
+            boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w;
+            boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h;
             for(j = 0; j < classes; ++j){
-                offset = i*per_cell + 5*num;
-                float prob = scale*predictions[offset+j];
+                int class_index = i*classes;
+                float prob = scale*predictions[class_index+j];
                 probs[index][j] = (prob > thresh) ? prob : 0;
             }
         }
diff --git a/src/writing.c b/src/writing.c
index cfbc5fd16dcae67919ddc2f430e02096053ce6bf..e0c1d1a83cb0d6c64b99916e5c77474aaa936a0b 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -2,8 +2,13 @@
 #include "utils.h"
 #include "parser.h"
 
+#ifdef OPENCV
+#include "opencv2/highgui/highgui_c.h"
+#endif
+
 void train_writing(char *cfgfile, char *weightfile)
 {
+    char *backup_directory = "/home/pjreddie/backup/";
     data_seed = time(0);
     srand(time(0));
     float avg_loss = -1;
@@ -23,41 +28,78 @@ void train_writing(char *cfgfile, char *weightfile)
     while(1){
         ++i;
         time=clock();
-        data train = load_data_writing(paths, imgs, plist->size, 512, 512);
+        data train = load_data_writing(paths, imgs, plist->size, 256, 256, 1);
+        printf("Loaded %lf seconds\n",sec(clock()-time));
+        time=clock();
         float loss = train_network(net, train);
-        #ifdef GPU
-        float *out = get_network_output_gpu(net);
-        #else
-        float *out = get_network_output(net);
-        #endif
-        image pred = float_to_image(64, 64, 1, out);
-        print_image(pred);
 
-/*
-        image im = float_to_image(256, 256, 3, train.X.vals[0]);
-        image lab = float_to_image(64, 64, 1, train.y.vals[0]);
+        /*
         image pred = float_to_image(64, 64, 1, out);
-        show_image(im, "image");
-        show_image(lab, "label");
-        print_image(lab);
-        show_image(pred, "pred");
-        cvWaitKey(0);
+        print_image(pred);
         */
 
+        /*
+           image im = float_to_image(256, 256, 3, train.X.vals[0]);
+           image lab = float_to_image(64, 64, 1, train.y.vals[0]);
+           image pred = float_to_image(64, 64, 1, out);
+           show_image(im, "image");
+           show_image(lab, "label");
+           print_image(lab);
+           show_image(pred, "pred");
+           cvWaitKey(0);
+         */
+
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
         printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
-        if((i % 20000) == 0) net.learning_rate *= .1;
         //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97;
         if(i%1000==0){
             char buff[256];
-            sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
+            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
             save_weights(net, buff);
         }
     }
 }
 
+void test_writing(char *cfgfile, char *weightfile, char *outfile)
+{
+    network net = parse_network_cfg(cfgfile);
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    set_batch_network(&net, 1);
+    srand(2222222);
+    clock_t time;
+    char filename[256];
+
+    fgets(filename, 256, stdin);
+    strtok(filename, "\n");
+    image im = load_image_color(filename, 0, 0);
+    //image im = load_image_color("/home/pjreddie/darknet/data/figs/C02-1001-Figure-1.png", 0, 0);
+    image sized = resize_image(im, net.w, net.h);
+    printf("%d %d %d\n", im.h, im.w, im.c);
+    float *X = sized.data;
+    time=clock();
+    network_predict(net, X);
+    printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time));
+    image pred = get_network_image(net);
+
+    if (outfile) {
+        printf("Save image as %s.png (shape: %d %d)\n", outfile, pred.w, pred.h);
+        save_image(pred, outfile);
+    } else {
+        show_image(pred, "prediction");
+#ifdef OPENCV
+        cvWaitKey(0);
+        cvDestroyAllWindows();
+#endif
+    }   
+
+    free_image(im);
+    free_image(sized);
+}
+
 void run_writing(int argc, char **argv)
 {
     if(argc < 4){
@@ -67,6 +109,8 @@ void run_writing(int argc, char **argv)
 
     char *cfg = argv[3];
     char *weights = (argc > 4) ? argv[4] : 0;
+    char *outfile = (argc > 5) ? argv[5] : 0;
     if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
+    else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, outfile);
 }