#!/usr/bin/python
#imports
import mwclient
import re
import os, sys, shutil
import tarfile
import hashlib
import tempfile

# Get the title of the section ==FileName.xyz==
# and everything between the <source*> </source> tag
def GetDescription(S):
    descr = re.sub(r'<div.*?</div>', "", S)
    reg = re.compile(r"(.*?)==([\w ]*?\.[\w ]*?)==", re.DOTALL)
    return [(x.strip('\n'), y.strip('\n')) for (x,y) in reg.findall(descr)]

def FixUrls(S):
    S = re.sub(r"\[(\S*)[ ]*(.*)\]", "[\g<2>](\g<1>)", S, re.DOTALL)
    return S

# Connect to the wiki
site = mwclient.Site(('https','itk.org'), '/Wiki/')

# Get list of all pages
xxx = 0
for page in site.pages:
    # Look for find pages that start with VTK/Examples/
    to_find = "VTK/Examples/"
    start = page.name.find(to_find)

    # If the page doesn't start with VTK/Examples, skip it
    if start < 0:
        continue

    # Get the part of the page name that comes after VTK/Examples/
    # e.g. if the page name is VTK/Examples/GeometricObjects/Line, 
    # ExamplePath will be GeometriObjects/Line
    ExamplePath = page.name[start+len(to_find):]

    # Continuing the above example, the below splits GeometricObjects/Line
    # into PathName = GeometricObjects/
    # ExampleName = Line
    PathSplit = os.path.split(ExamplePath)
    PathName = PathSplit[0]
    ExampleName = re.sub(" ", "_", PathSplit[1])

    # Get the content of the page
    content = page.edit()

    # Get all of the file names and file content on the page
    DescriptionChunks = GetDescription(content)
    if DescriptionChunks:
        for code in DescriptionChunks:
            # Extract the Level 2 heading (e.g. ==Line.cxx==) into FileName and the content before the heading into FileContent
            FileName = code[1]
            FileContent = FixUrls(code[0])
            # Skip boneyard and broken files
            if PathName.find("Boneyard") >= 0:
                continue
            if PathName.find("Broken") >= 0:
                continue
            if FileName == "CMakeLists.txt":
                break
            if FileName ==" CMakeLists.txt ":
                continue
            if FileName == " CmakeLists.txt":
                continue
            if FileName == "CmakeLists.txt":
                continue

            # Don't ceate a file if there is no content
            if code[0] == "":
              continue

            print FileName
            print FileContent
            # Check if the path exists, if not, create it
            if not os.path.exists(PathName):
                if PathName != "":
                    os.makedirs(PathName)
            # Change the extension to md
            FileName = re.sub(r'cxx$', "md", FileName)
            FileName = re.sub(r'cs$', "md", FileName)
            FileName = re.sub(r'py$', "md", FileName)
            FileName = re.sub(r'java$', "md", FileName)
            FileName = re.sub(r'tcl$', "md", FileName)
            FileName = re.sub(r'h$', "md", FileName)
            # Write the source code file
            OutputFile = "./" + PathName + "/" + FileName
            print "Creating " + OutputFile
            MyFile = open(OutputFile, 'w')
            MyFile.write("### Description\n")
            MyFile.write(FileContent.encode('ascii','replace'))
            MyFile.write("\n")
            MyFile.close()

