"Python Source Colorizer"
_version = "$Id: colorize_py.cgipy,v 1.4 2003/10/03 21:03:08 gra Exp $"
import os, cgi, string, sys, cStringIO
import keyword, token, tokenize, re
encodingre = re.compile(r"^\s*#.*coding[:=]\s*([\w_.-]+)")
_KEYWORD = token.NT_OFFSET + 1
_TEXT = token.NT_OFFSET + 2
_classes = {
token.NUMBER: 'number',
token.OP: 'op',
token.STRING: 'string',
tokenize.COMMENT: 'comment',
token.NAME: 'name',
token.ERRORTOKEN: 'errortoken',
_KEYWORD: 'keyword',
_TEXT: 'text',
}
class Parser:
""" Send colored python source.
"""
def __init__(self, raw, out = sys.stdout):
""" Store the source text.
"""
self.raw = string.strip(string.expandtabs(raw))
self.out = out
def format(self):
""" Parse and send the colored source.
"""
self.lines = [0, 0]
pos = 0
while 1:
pos = string.find(self.raw, '\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
self.encoding="ISO-8859-1"
for i in (1, 2):
if i >= len(self.lines)-1: break
line = self.raw[self.lines[i]:self.lines[i+1]]
match = encodingre.search(line)
if match:
self.encoding = match.group(1)
break
self.pos = 0
text = cStringIO.StringIO(self.raw)
self.out.write('<pre class="python">')
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
msg = ex[0]
line = ex[1][0]
self.out.write("<h3>ERROR: %s</h3>%s\n" % (
msg, self.raw[self.lines[line]:]))
self.out.write('</pre>')
def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
""" Token handler.
"""
if 0:
print "type", toktype, token.tok_name[toktype], "text", toktext,
print "start", srow,scol, "end", erow,ecol, "<br>"
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
if toktype in [token.NEWLINE, tokenize.NL]:
self.out.write('\n')
return
if newpos > oldpos:
self.out.write(self.raw[oldpos:newpos])
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
if token.LPAR <= toktype and toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
clazz = _classes.get(toktype, _classes[_TEXT])
style = ''
if toktype == token.ERRORTOKEN:
style = ' style="border: solid 1.5pt #FF0000;"'
self.out.write('<span class="%s"%s>' % (clazz, style))
if toktype == tokenize.COMMENT:
toktext = toktext.rstrip()
self.out.write(cgi.escape(toktext))
self.out.write('</span>')
if toktype == tokenize.COMMENT:
self.out.write('\n')
if __name__ == "__main__":
import os, sys
def colorize_file(filepath=None):
if filepath:
if not filepath.endswith(".py"):
raise SystemExit("File %s not allowed." % filepath)
try:
sourcefile = open(filepath)
sourcestream = sourcefile.read()
except IOError:
raise SystemExit("File %s unknown." % filepath)
else:
sourcestream = sys.stdin.read()
out = cStringIO.StringIO()
parser = Parser(sourcestream, out)
parser.format()
if filepath:
sourcefile.close()
return out.getvalue(), parser.encoding
if os.environ.get('QUERY_STRING'):
script = os.environ.get('SCRIPT_FILENAME')
path = os.path.split(script)[0]
filepath =os.environ.get('DOCUMENT_ROOT') + "/" + os.environ.get('QUERY_STRING')
filename = os.path.split(filepath)[1]
formated, encoding = colorize_file(filepath)
print "Content-Type: text/html;charset=%s" % encoding
print
elif os.environ.get('PATH_TRANSLATED'):
filepath = os.environ.get('PATH_TRANSLATED')
filename = os.path.split(filepath)[1]
formated, encoding = colorize_file(filepath)
print "Content-Type: text/html;charset=%s" % encoding
print
elif len(sys.argv) > 1:
filepath = sys.argv[1]
filename = os.path.split(filepath)[1]
formated, encoding = colorize_file(filepath)
else:
filename = ""
formated, encoding = colorize_file()
headers = """
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<head>
<meta http-equiv="Content-Type" content="text/html;charset=%s">
<title>%s</title>
<link rel="stylesheet" type="text/css" href="/python.css">
</head>
"""
print headers % (encoding, filename)
print formated