early python port of the FIT test framework (http://fit.zwiki.org/)

root / Parse.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
Python translation of fit..
which is copyright (c) 2002 Cunningham & Cunningham, Inc.
Released under the terms of the GNU General Public License version 2 or later.
"""

import string, re

class ParseException(Exception):
    def __init__(self, message, offset):
        self.message = message
        self.offset = offset
    def __str__(self):
        return '%s, %s' % (self.message,self.offset)

class Parse:
    """
    """
    leader  = ''
    tag     = ''
    body    = ''
    end     = ''
    trailer = ''
    more    = None
    parts   = None
    tags = ("table", "tr", "td")

    # what's the cleanest way to map these to python ?
    #public Parse (String tag, String body, Parse parts, Parse more) {
    #public Parse (String text) throws ParseException {
    #public Parse (String text, String tags[]) throws ParseException {
    #public Parse (String text, String tags[], int level, int offset) throws ParseException {
    def __init__(self,
                 text=None,tags=tags,level=0,offset=0, # use either these
                 tag='',body='',parts=None,more=None): # or these
        if text == None:
            if tag == None: tag = ''
            if body == None: body = ''
            self.leader = "\n"
            self.tag = "<"+tag+">"
            self.body = body
            self.end = "</"+tag+">"
            self.trailer = ""
            self.parts = parts
            self.more = more
        else:
            lc = text.lower()
            startTag = lc.find("<"+tags[level])
            endTag = lc.find(">", startTag) + 1
            startEnd = lc.find("</"+tags[level], endTag)
            endEnd = lc.find(">", startEnd) + 1
            startMore = lc.find("<"+tags[level], endEnd)
            if (startTag<0 or endTag<0 or startEnd<0 or endEnd<0):
                raise ParseException(
                    "Can't find tag: " + tags[level], 
                    offset)

            self.leader = text[0:startTag]
            self.tag = text[startTag:endTag]
            self.body = text[endTag:startEnd]
            self.end = text[startEnd:endEnd]
            self.trailer = text[endEnd:]

            if (level+1 < len(tags)):
                self.parts = Parse(self.body, tags, level+1, offset+endTag)
                self.body = None

            if (startMore>=0):
                self.more = Parse(self.trailer, tags, level, offset+endEnd)
                self.trailer = None

    #public int size() {
    def size(self):
        if self.more:
            return self.more.size()+1
        else:
            return 1

    #public Parse last() {
    def last(self):
        if self.more:
            return self.more.last()
        else:
            return self

    #public Parse leaf() {
    def leaf(self):
        if self.parts:
            return self.parts.leaf()
        else:
            return self

    #public Parse at(int i) {
    #public Parse at(int i, int j) {
    #public Parse at(int i, int j, int k) {
    def at(self, i, j=None, k=None):
        if j == None and k == None:
            if (i==0 or not self.more):
                return self
            else:
                return self.more.at(i-1)
        elif k == None:
            return self.at(i).parts.at(j)
        else:
            return self.at(i,j).parts.at(k)

    #public String text() {
    def text(self):
        return self.unescape(self.unformat(self.body)).strip()

    #static String unformat(String s) {
    def unformat(self, s):
        #return re.sub('<.*?>','',s)
        i=0
        while 1:
            i = s.find('<',i)
            if i == -1: break
            j = s.find('>',i+1)
            if j >= 0:
                s = s[:i] + s[j+1:]
            else: break
        return s

    #static String unescape(String s) {
    def unescape(self,s):
        #return re.sub('&([^&]*?);',lambda x:self.replacement(x.group(1)),s)
        i=-1
        while 1:
            i = s.find('&',i+1)
            if i == -1: break
            j = s.find(';',i+1)
            if j >= 0:
                fromstring = s[i+1:j].lower()
                tostring = self.replacement(fromstring)
                if tostring:
                    s = s[:i] + tostring + s[j+1:]
        return s

    #static String replacement(String from) {
    def replacement(self,s):
        entities = {
            'lt':'<',
            'gt':'>',
            'amp':'&',
            'nbsp':' ',
            }
        if s in entities.keys(): return entities[s]
        else: return None

    #public void addToTag(String text) {
    def addToTag(self,text):
        self.tag = self.tag[:-1] + text + ">"

    #public void addToBody(String text) {
    def addToBody(self,text):
        self.body = self.body + text

    #public void print(PrintWriter out) {
    def __str__(self):
        s = self.leader
        s += self.tag
        if self.parts:
            s += str(self.parts())
        else:
            s += self.body
        s += self.end
        if self.more:
            s += str(self.more)
        else:
            s += self.trailer
        return s

    __call__ = __str__