-
Notifications
You must be signed in to change notification settings - Fork 0
/
adasyn_second_step.py
43 lines (38 loc) · 1.58 KB
/
adasyn_second_step.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
from sklearn import neighbors
import pandas as pd
from sklearn.model_selection import train_test_split
import operator
from statistics import mean
def adasyn_second_step(xtrain, ytrain, target_column, m, most_common, complex_model, nominal, Gi, Minority_per_xi):
# (xtrain, ytrain, beta, threshold, target_column, boost_coef, K=5)
# we introduce the parameter class weight
# it says how many times we want to increase the population of each class
# df, X_train, y_train, class_weight, "target"
train_dataset = pd.concat([xtrain, ytrain], axis=1, sort=False)
# print(len(train_dataset))
# print(train_dataset)
syn_data = []
for i in range(m):
xi = xtrain.iloc[i, :]
most_common_feature = most_common[i]
# print("xi", xi)
for j in range(Gi[i]):
# If the minority list is not empty
if Minority_per_xi[i]:
index = np.random.choice(Minority_per_xi[i])
xzi = xtrain.iloc[index, :]
si = xi + (xzi - xi) * np.random.uniform(0, 1)
if len(nominal) >= 1:
for feature in nominal:
si[feature] = most_common_feature[feature]
syn_data.append(si)
# Build the data matrix
new_y = []
for i in range(len(syn_data)):
new_y.append(int(complex_model.predict([syn_data[i]])))
new_y_df = pd.DataFrame({target_column: new_y})
new_df = pd.DataFrame(syn_data)
new_df.reset_index(drop=True, inplace=True)
new_df = pd.concat([new_df, new_y_df], axis=1)
return new_df