-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path02_01_00_pandas_intro.py
108 lines (98 loc) · 3.01 KB
/
02_01_00_pandas_intro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
import os
# importacion de pandas
import pandas as pd
from joblib import PrintTime
# The initial set of baby names and bith rates
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
births = [968, 155, 77, 578, 973]
# Dataset
BabyDataSet = list(zip(names, births))
print("DataSet")
print(BabyDataSet)
# [('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]
df = pd.DataFrame(data=BabyDataSet)
print("DataFrame Sin columnas")
print(df)
df = pd.DataFrame(data=BabyDataSet, columns=['Names', 'Births'])
print("DataFrame")
print(df)
print("DataFrame Shape")
print(df.shape)
print("DataFrame Describe")
print(df.describe())
print("DataFrame Describe Shape")
print(df.describe().shape)
# Volcamos el DataFrame a fichero
Location = './csv/births1880.csv'
df.to_csv(Location, index=False, header=True, sep=";")
print(df)
# Lectura de fichero
# df = pd.read_csv(Location, header=False)
x = pd.read_csv(Location)
# df = pd.read_csv(Location, names=['Names', 'Births'])
# df = pd.read_csv(Location, names=['Nombres', 'Nacimientos'])
print("Datos del DataFrame, cargado desde CSV")
print(df)
print("Primeras posiciones")
print(df.head())
print("Últimas posiciones")
print(df.tail())
Sorted = df.sort_values(by=['Births'], ascending=False)
print("Imprime la primera fila ordenada por nacimientos")
print(Sorted)
print(Sorted.head(2))
print(Sorted.tail(2))
# Selección de una columna del DF
print("Selección de columna")
print(df['Births'])
print("Shape de seleccion")
print(df['Births'].shape)
print("Imprime el valor máximo de nacimientos")
print(df['Births'].max())
# Carga de un Dataframe
s = pd.Series([1, 2, 3, 4])
print("DataFrame")
print(s)
print("Shape")
print(s.shape)
# Función describe
print("Describe")
print(s.describe())
print("Carga de diccionario")
d = {'col1': [1, 2], 'col2': [3, 4], 'col3': [5, 6]}
df = pd.DataFrame(data=d)
print("DataFrame")
print(df)
print(df.shape)
print("Describe")
print(df.describe())
# Cómo cargar un DataFrame desde un Bunch de datos de sklearn
import numpy as np
from sklearn.datasets import load_iris
print("iris")
# save load_iris() sklearn dataset to iris
# if you'd like to check dataset type use: type(load_iris())
# if you'd like to view list of attributes use: dir(load_iris())
iris = load_iris()
print(type(iris))
print(type(iris.data))
print(type(iris.target))
# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..
# the original dataset would probably call this ['Species']
print("iris['data']")
print(type(iris['data']))
print("iris['target']")
print(type(iris['target']))
print("iris['feature_names']")
print(iris['feature_names'])
data1 = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
print(type(data1))
print(iris['feature_names'])
print(iris['target'])
print(iris['target'].shape)
print(data1)