Source code for chemdataextractor.relex.pattern

# -*- coding: utf-8 -*-
"""
Extraction pattern object
"""

"""
   Modify generate_cde_element() function to adapt the changes of phrase.py.
   If any prefix/middle/suffix are empty (blank), do not add it to the resulting phrase.
   Modified by jz449
"""

import re
from ..parse.elements import I, W, R, Any, And, Start, OneOrMore, Group
from ..parse.actions import join


[docs]class Pattern: """ Pattern object, fundamentally the same as a phrase except assigned a confidence"""
[docs] def __init__(self, entities=None, elements=None, label=None, sentences=None, order=None, relations=None, confidence=0): self.cluster_label = label self.elements = elements self.entities = entities self.number_of_entities = len(order) self.order = order self.relations = relations self.confidence = confidence self.parse_expression = self.generate_cde_parse_expression()
def __repr__(self): return self.to_string()
[docs] def to_string(self): output_string = '' output_string += ' '.join(self.elements['prefix']['tokens']) + ' ' if isinstance(self.entities[0].tag, tuple): output_string += '(' + ', '.join([i for i in self.entities[0].tag]) + ') ' else: output_string += '(' + self.entities[0].tag + ') ' for i in range(0, self.number_of_entities - 1): output_string += ' '.join(self.elements['middle_' + str(i+1)]['tokens']) + ' ' if isinstance(self.entities[i+1].tag, tuple): output_string += '(' + ', '.join([i for i in self.entities[i+1].tag]) + ') ' else: output_string += '(' + self.entities[i+1].tag + ') ' output_string = output_string output_string += ' '.join(self.elements['suffix']['tokens']) return output_string
# TODO: Finish this once new parse_expressions are handled
[docs] def generate_cde_parse_expression(self): """Create a CDE parse expression for this extraction pattern """ elements = [] prefix_tokens = self.elements['prefix']['tokens'] for token in prefix_tokens: if token == '<Blank>': continue elements.append(I(token)) elements.append(self.entities[0].parse_expression) for middle in range(0, self.number_of_entities -1): middle_tokens = self.elements['middle_' + str(middle+1)]['tokens'] for token in middle_tokens: if token == '<Blank>': continue elements.append(I(token)) elements.append(self.entities[middle+1].parse_expression) suffix_tokens = self.elements['suffix']['tokens'] for token in suffix_tokens: if token == '<Blank>': continue elements.append(I(token)) final_phrase = And(exprs=elements) parse_expression = (final_phrase)('phrase') return parse_expression