forked from pepesan/machine-learning-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_01_00_pandas_intro.py
84 lines (70 loc) · 2.25 KB
/
02_01_00_pandas_intro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
import os
#importacion de pandas
import pandas as pd
# The inital set of baby names and bith rates
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
births = [968, 155, 77, 578, 973]
#Dataset
BabyDataSet = list(zip(names,births))
print("DataSet")
print(BabyDataSet)
# [('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]
df = pd.DataFrame(data=BabyDataSet, columns=['Names', 'Births'])
print("DataFrame")
print(df)
print("DataFrame Shape")
print(df.shape)
print("DataFrame Describe")
print(df.describe())
print("DataFrame Describe Shape")
print(df.describe().shape)
#veremos las filas y las columnas del DataFrame
Location = './csv/births1880.csv'
df.to_csv(Location, index=False, header=False)
print(df)
#lectura de fichero
df = pd.read_csv(Location, header=None)
df = pd.read_csv(Location, names=['Names', 'Births'])
print("Datos del DataFrame, cargado desde CSV")
print(df)
Sorted = df.sort_values(['Births'], ascending=False)
print("Imprime la primera fila ordenada por nacimientos")
print(Sorted)
print(Sorted.head(2))
print("Imprime el valor máximo de nacimientos")
print(df['Births'].max())
#Carga de un Dataframe
s = pd.Series([1, 2, 3, 4])
print("DataFrame")
print(s)
print("Shape")
print(s.shape)
#Función describe
print("Describe")
print(s.describe())
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
print("DataFrame")
print(df)
print(df.shape)
print("Describe")
print(df.describe())
#Cómo cargar un DataFrame desde un Bunch de datos de sklearn
import numpy as np
from sklearn.datasets import load_iris
# save load_iris() sklearn dataset to iris
# if you'd like to check dataset type use: type(load_iris())
# if you'd like to view list of attributes use: dir(load_iris())
iris = load_iris()
print(type(iris))
print(type(iris.data))
# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..
# the original dataset would probably call this ['Species']
data1 = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= iris['feature_names'] + ['target'])
print(type(data1))
print(data1)