Skip to content

Commit

Permalink
Update data_helpers.py
Browse files Browse the repository at this point in the history
  • Loading branch information
bhaveshoswal authored Apr 21, 2017
1 parent dfb7bcc commit aa78e5c
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions data_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def clean_str(string):
"""
Tokenization/string cleaning for all datasets except for SST.
Tokenization/string cleaning for datasets.
Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
Expand All @@ -27,7 +27,7 @@ def clean_str(string):

def load_data_and_labels():
"""
Loads MR polarity data from files, splits the data into words and generates labels.
Loads polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# Load data from files
Expand Down Expand Up @@ -78,7 +78,7 @@ def build_vocab(sentences):

def build_input_data(sentences, labels, vocabulary):
"""
Maps sentencs and labels to vectors based on a vocabulary.
Maps sentences and labels to vectors based on a vocabulary.
"""
x = np.array([[vocabulary[word] for word in sentence] for sentence in sentences])
y = np.array(labels)
Expand All @@ -87,7 +87,7 @@ def build_input_data(sentences, labels, vocabulary):

def load_data():
"""
Loads and preprocessed data for the MR dataset.
Loads and preprocessed data for the dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
Expand Down

0 comments on commit aa78e5c

Please sign in to comment.