#!/usr/bin/python
import os
import sys
import string

entitys = {
 "amp" : "&",
 "quot" : '"',
 "lt" : '<',
 "gt" : '>',
 "nbsp" : ' ',
 "trade" : '(tm)',
 "#8482" : '(tm)', # URGH!
 "#8211" : '-', # URGH!!
 "#8212" : '-', # URGH!!
 "#8220" : '"', # I think
 "#8221" : '"', # I think
 "copy" : '(c)',
 "commat" : '@',
 "semi" : ';'
 }

listtype=""

def doTag(tag,flag,attr):
	global listtype
	tag = tag.lower()
	if tag=="b" or tag=="strong" or tag=="h4" or tag=="h5":
		return "__"
	elif tag=="i" or tag=="em":
		return "''"
	elif tag=="h1": 
		if flag==0: return "\n!!!"
	elif tag=="h2": 
		if flag==0: return "\n!!"
	elif tag=="h3": 
		if flag==0: return "\n!"
	elif tag=="ul":
		if flag==0: 
			listtype=listtype+"*"
		else:
			listtype=listtype[:-1]
	elif tag=="ol":
		if flag==0:
			listtype=listtype+"#"
		else:
			listtype=listtype[:-1]
	elif tag=="li":
		return "\n"+listtype
	elif tag=="p":
		return "\n\n"
	elif tag[:3]=="!--":
		pass
				#Fixme: pre could work
	elif tag in ["meta","a","title","html","head","body",
		     "br","big","small","tr","td","th","tt","pre","img",
		     "code","blockquote","div","span","dl","font",
		     "thead","tfoot","tbody","phr","center","caption","figure",
		     "eps","ph","dm","sup","sub"]:
		pass
	elif tag=="table":
		if flag==0: return "\n"
	elif tag=="hr":
		return "----"
	elif tag=="dt":
		if flag==0: return " ; "
	elif tag=="dd":
		if flag==0: return " : "
	else:
		print "unknown tag: %s" % tag
		#sys.exit(1)
		return "<%s%s %s>" % (["","/"][flag],tag,
			" ".join(map(lambda x,attr=attr:"%s=%s" % (x,attr[x]),attr.keys()))
			)
	return ""


def doManPage(file):
	print file
	outfile=open(sys.argv[2]+os.path.basename(file).replace(".html",""),"w")
	infile=open(file,"r")
	state=0
	for i in infile.readlines():
		line=i.lstrip()
		out=""
		while line!="":
			char=line[0]
			if state==0: # Normal text
				if char=='<':
					state=1
					flag=0
					tag=""
				elif char=="&":
				        entity=""
					value=""
					state=5
				elif char=="[":
					out=out+"[["
				else:
					out=out+char
			elif state==1: # first char after a <
				if char=='>':
					state=0
				elif char=='/':
					state=2
					flag=1
				else:
					tag=tag+char
					state=2
			elif state==2: # during the tag name
				if char=='>':
					state=0
					out=out+doTag(tag,flag,{})
				elif char in [' ',"\n","\r"]:
					state=3
					attrs={}
					attr=""
				else:
					tag=tag+char
					state=2
			elif state==3: # inside attributes
				if char=='>':
					state=0
					out=out+doTag(tag,flag,attrs)
				elif char=="=":
					value=""
					state=4
				else:
					attr=attr+char
			elif state==4: # inside an attribute value
				if char=='>':
					state=0
					out=out+doTag(tag,flag,attrs)
				elif char in [" ","\n","\r"]:
					state=3
					attrs[attr]=value
					attr=""
				else:
					value=value+char
			elif state==5: # &'s
				if char==";":
					state=0
					if entitys.has_key(entity):
						out=out+entitys[entity]
					elif entity[0]=='#' and len(entity)==3:
						out=out+chr(string.atoi(entity[1:],16))
					else:
						print "unknown entity %s" % entity
						out=out+entity
				else:
					entity=entity+char
			else:
				print "I'm lost (%i)" % state
				sys.exit(1)
			line=line[1:]
		if out:
			outfile.write(out)


def doManPageDir(ignored,dir,files):
	for i in files:
		doManPage(dir+"/"+i)

os.path.walk(sys.argv[1],doManPageDir,None)

