This file is indexed.

/usr/share/pyshared/relational/parser.py is in python-relational 1.2-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# -*- coding: utf-8 -*-
# coding=UTF-8
# Relational
# Copyright (C) 2008  Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
#
#
#
# This module implements a parser for relational algebra, and can be used
# to convert expressions into python expressions and to get the parse-tree
# of the expression.
#
# The input must be provided in UTF-8
#
#
# Language definition:
# Query := Ident
# Query := Query BinaryOp Query
# Query := (Query)
# Query := σ PYExprWithoutParenthesis (Query) | σ (PYExpr) (Query)
# Query := π FieldList (Query)
# Query := ρ RenameList (Query)
# FieldList := Ident | Ident , FieldList
# RenameList := Ident ➡ Ident | Ident ➡ Ident , RenameList
# BinaryOp := * | - | ᑌ | ᑎ | ÷ | ᐅᐊ | ᐅLEFTᐊ | ᐅRIGHTᐊ | ᐅFULLᐊ
#
# Language definition here:
# https://github.com/ltworf/relational/wiki/Grammar-and-language
import re
import rtypes

RELATION = 0
UNARY = 1
BINARY = 2

PRODUCT = u'*'
DIFFERENCE = u'-'
UNION = u'ᑌ'
INTERSECTION = u'ᑎ'
DIVISION = u'÷'
JOIN = u'ᐅᐊ'
JOIN_LEFT = u'ᐅLEFTᐊ'
JOIN_RIGHT = u'ᐅRIGHTᐊ'
JOIN_FULL = u'ᐅFULLᐊ'
PROJECTION = u'π'
SELECTION = u'σ'
RENAME = u'ρ'
ARROW = u'➡'

b_operators = (PRODUCT, DIFFERENCE, UNION, INTERSECTION, DIVISION,
               JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL)  # List of binary operators
u_operators = (PROJECTION, SELECTION, RENAME)  # List of unary operators

# Associates operator with python method
op_functions = {
    PRODUCT: 'product', DIFFERENCE: 'difference', UNION: 'union', INTERSECTION: 'intersection', DIVISION: 'division', JOIN: 'join',
    JOIN_LEFT: 'outer_left', JOIN_RIGHT: 'outer_right', JOIN_FULL: 'outer', PROJECTION: 'projection', SELECTION: 'selection', RENAME: 'rename'}


class TokenizerException (Exception):
    pass


class ParserException (Exception):
    pass


class node (object):

    '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.

    The kind property says if the node is a binary operator, unary operator or relation.
    Since relations are leaves, a relation node will have no attribute for children.

    If the node is a binary operator, it will have left and right properties.

    If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
    the string with the props of the operation.

    This class is used to convert an expression into python code.'''
    kind = None
    __hash__ = None

    def __init__(self, expression=None):
        '''Generates the tree from the tokenized expression
        If no expression is specified then it will create an empty node'''
        if expression == None or len(expression) == 0:
            return

        # If the list contains only a list, it will consider the lower level list.
        # This will allow things like ((((((a))))) to work
        while len(expression) == 1 and isinstance(expression[0], list):
                expression = expression[0]

        # The list contains only 1 string. Means it is the name of a relation
        if len(expression) == 1 and isinstance(expression[0], unicode):
            self.kind = RELATION
            self.name = expression[0]
            if not rtypes.is_valid_relation_name(self.name):
                raise ParserException(
                    u"'%s' is not a valid relation name" % self.name)
            return

        '''Expression from right to left, searching for binary operators
        this means that binary operators have lesser priority than
        unary operators.
        It finds the operator with lesser priority, uses it as root of this
        (sub)tree using everything on its left as left parameter (so building
        a left subtree with the part of the list located on left) and doing
        the same on right.
        Since it searches for strings, and expressions into parenthesis are
        within sub-lists, they won't be found here, ensuring that they will
        have highest priority.'''
        for i in xrange(len(expression) - 1, -1, -1):
            if expression[i] in b_operators:  # Binary operator
                self.kind = BINARY
                self.name = expression[i]

                if len(expression[:i]) == 0:
                    raise ParserException(
                        u"Expected left operand for '%s'" % self.name)

                if len(expression[i + 1:]) == 0:
                    raise ParserException(
                        u"Expected right operand for '%s'" % self.name)

                self.left = node(expression[:i])
                self.right = node(expression[i + 1:])
                return
        '''Searches for unary operators, parsing from right to left'''
        for i in xrange(len(expression) - 1, -1, -1):
            if expression[i] in u_operators:  # Unary operator
                self.kind = UNARY
                self.name = expression[i]

                if len(expression) <= i + 2:
                    raise ParserException(
                        u"Expected more tokens in '%s'" % self.name)

                self.prop = expression[1 + i].strip()
                self.child = node(expression[2 + i])

                return
        raise ParserException(u"Unable to parse tokens")
        pass

    def toCode(self):
        '''This method converts the tree into a python code object'''
        code = self.toPython()
        return compile(code, '<relational_expression>', 'eval')

    def toPython(self):
        '''This method converts the expression into a python code string, which
        will require the relation module to be executed.'''
        if self.name in b_operators:
            return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
        elif self.name in u_operators:
            prop = self.prop

            # Converting parameters
            if self.name == PROJECTION:
                prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
            elif self.name == RENAME:
                prop = '{\"%s\"}' % prop.replace(
                    ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
            else:  # Selection
                prop = '\"%s\"' % prop

            return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
        else:
            return self.name
        pass

    def printtree(self, level=0):
        '''returns a representation of the tree using indentation'''
        r = ''
        for i in range(level):
            r += '  '
        r += self.name
        if self.name in b_operators:
            r += self.left.printtree(level + 1)
            r += self.right.printtree(level + 1)
        elif self.name in u_operators:
            r += '\t%s\n' % self.prop
            r += self.child.printtree(level + 1)

        return '\n' + r

    def get_left_leaf(self):
        '''This function returns the leftmost leaf in the tree. It is needed by some optimizations.'''
        if self.kind == RELATION:
            return self
        elif self.kind == UNARY:
            return self.child.get_left_leaf()
        elif self.kind == BINARY:
            return self.left.get_left_leaf()

    def result_format(self, rels):
        '''This function returns a list containing the fields that the resulting relation will have.
        It requires a dictionary where keys are the names of the relations and the values are
        the relation objects.'''
        if rels == None:
            return

        if self.kind == RELATION:
            return list(rels[self.name].header.attributes)
        elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
            return self.left.result_format(rels)
        elif self.kind == BINARY and self.name == DIVISION:
            return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
        elif self.name == PROJECTION:
            l = []
            for i in self.prop.split(','):
                l.append(i.strip())
            return l
        elif self.name == PRODUCT:
            return self.left.result_format(rels) + self.right.result_format(rels)
        elif self.name == SELECTION:
            return self.child.result_format(rels)
        elif self.name == RENAME:
            _vars = {}
            for i in self.prop.split(','):
                q = i.split(ARROW)
                _vars[q[0].strip()] = q[1].strip()

            _fields = self.child.result_format(rels)
            for i in range(len(_fields)):
                if _fields[i] in _vars:
                    _fields[i] = _vars[_fields[i]]
            return _fields
        elif self.name in (JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL):
            return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))

    def __eq__(self, other):
        if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
            return False

        if self.kind == UNARY:
            if other.prop != self.prop:
                return False
            return self.child == other.child
        if self.kind == BINARY:
            return self.left == other.left and self.right == other.right
        return True

    def __str__(self):
        if (self.kind == RELATION):
            return self.name
        elif (self.kind == UNARY):
            return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
        elif (self.kind == BINARY):
            if self.left.kind == RELATION:
                le = self.left.__str__()
            else:
                le = "(" + self.left.__str__() + ")"
            if self.right.kind == RELATION:
                re = self.right.__str__()
            else:
                re = "(" + self.right.__str__() + ")"

            return (le + self.name + re)


def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')'):
    '''This function returns the position of the matching
    close parenthesis to the 1st open parenthesis found
    starting from start (0 by default)'''
    par_count = 0  # Count of parenthesis
    for i in range(start, len(expression)):
        if expression[i] == openpar:
            par_count += 1
        elif expression[i] == closepar:
            par_count -= 1
            if par_count == 0:
                return i  # Closing parenthesis of the parameter


def tokenize(expression):
    '''This function converts an expression into a list where
    every token of the expression is an item of a list. Expressions into
    parenthesis will be converted into sublists.'''
    if not isinstance(expression, unicode):
        raise TokenizerException('expected unicode')

    items = []  # List for the tokens

    '''This is a state machine. Initial status is determined by the starting of the
    expression. There are the following statuses:

    relation: this is the status if the expressions begins with something else than an
        operator or a parenthesis.
    binary operator: this is the status when parsing a binary operator, nothing much to say
    unary operator: this status is more complex, since it will be followed by a parameter AND a
        sub-expression.
    sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'.
        means that the others open must be counted to determine which close is the right one.'''

    expression = expression.strip()  # Removes initial and endind spaces
    state = 0
    '''
    0 initial and useless
    1 previous stuff was a relation
    2 previous stuff was a sub-expression
    3 previous stuff was a unary operator
    4 previous stuff was a binary operator
    '''

    while len(expression) > 0:

        if expression.startswith('('):  # Parenthesis state
            state = 2
            end = _find_matching_parenthesis(expression)
            if end == None:
                raise TokenizerException(
                    "Missing matching ')' in '%s'" % expression)
            # Appends the tokenization of the content of the parenthesis
            items.append(tokenize(expression[1:end]))
            # Removes the entire parentesis and content from the expression
            expression = expression[end + 1:].strip()

        elif expression.startswith((u"σ", u"π", u"ρ")):  # Unary 2 bytes
            items.append(expression[0:1])
                         #Adding operator in the top of the list
            expression = expression[
                1:].strip()  # Removing operator from the expression

            if expression.startswith('('):  # Expression with parenthesis, so adding what's between open and close without tokenization
                par = expression.find(
                    '(', _find_matching_parenthesis(expression))
            else:  # Expression without parenthesis, so adding what's between start and parenthesis as whole
                par = expression.find('(')

            items.append(expression[:par].strip())
                         #Inserting parameter of the operator
            expression = expression[
                par:].strip()  # Removing parameter from the expression
        elif expression.startswith((u"÷", u"ᑎ", u"ᑌ", u"*", u"-")):
            items.append(expression[0])
            expression = expression[1:].strip()  # 1 char from the expression
            state = 4
        elif expression.startswith(u"ᐅ"):  # Binary long
            i = expression.find(u"ᐊ")
            if i == -1:
                raise TokenizerException(u"Expected ᐊ in %s" % (expression,))
            items.append(expression[:i + 1])
            expression = expression[i + 1:].strip()
            state = 4
        elif re.match(r'[_0-9A-Za-z]', expression[0]) == None:  # At this point we only have relation names, so we raise errors for anything else
            raise TokenizerException(
                "Unexpected '%c' in '%s'" % (expression[0], expression))
        else:  # Relation (hopefully)
            if state == 1:  # Previous was a relation, appending to the last token
                i = items.pop()
                items.append(i + expression[0])
                expression = expression[
                    1:].strip()  # 1 char from the expression
            else:
                state = 1
                items.append(expression[0])
                expression = expression[
                    1:].strip()  # 1 char from the expression

    return items


def tree(expression):
    '''This function parses a relational algebra expression into a tree and returns
    the root node using the Node class defined in this module.'''
    return node(tokenize(expression))


def parse(expr):
    '''This function parses a relational algebra expression, converting it into python,
    executable by eval function to get the result of the expression.
    It has 2 class of operators:
    without parameters
    *, -, ᑌ, ᑎ, ᐅᐊ, ᐅLEFTᐊ, ᐅRIGHTᐊ, ᐅFULLᐊ
    with parameters:
    σ, π, ρ

    Syntax for operators without parameters is:
    relation operator relation

    Syntax for operators with parameters is:
    operator parameters (relation)

    Since a*b is a relation itself, you can parse π a,b (a*b).
    And since π a,b (A) is a relation, you can parse π a,b (A) ᑌ B.

    You can use parenthesis to change priority: a ᐅᐊ (q ᑌ d).

    IMPORTANT: all strings must be unicode

    EXAMPLES
    σage > 25 and rank == weight(A)
    Q ᐅᐊ π a,b(A) ᐅᐊ B
    ρid➡i,name➡n(A) - π a,b(π a,b(A)) ᑎ σage > 25 or rank = weight(A)
    π a,b(π a,b(A))
    ρid➡i,name➡n(π a,b(A))
    A ᐅᐊ B
    '''
    return tree(expr).toPython()

if __name__ == "__main__":
    while True:
        e = unicode(raw_input("Expression: "), 'utf-8')
        print parse(e)

    # b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
    # print b[0]
    # parse(b)
    pass