-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_recipe.py
130 lines (93 loc) · 3.44 KB
/
get_recipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Data libraries
import pandas as pd
import numpy as np
import os
def get_data():
'''
This function imports the recipes and cuisines data files as pandas
dataframes
input:
None
Output:
recipes_df: Dataframe containing all recipes
cuisines_df: Dataframe containing the cuisine name and the cuisine index
'''
filePath = os.path.join(os.getcwd(), "data")
cuisinesFile = os.path.join(filePath, "Cuisines.csv")
recipesFile = os.path.join(filePath, "recipes.csv")
cuisines_df = pd.read_csv(cuisinesFile, header= None,
names = ["cuisine", "cuisine_name"])
recipes_df = pd.read_csv(recipesFile)
# Replace whitespaces with underscore and make all ingredients lowerscore
for column in recipes_df.columns:
recipes_df.rename(
columns={column:column.replace(' ', '_').replace("'", "")},
inplace=True
)
recipes_df.rename(columns={column:column.lower()}, inplace=True)
# Cuisines_df has different indexing.
# Removing one to match the index with the recipes_df
cuisines_df["cuisine"] = cuisines_df["cuisine"] -1
cuisines_df
return recipes_df, cuisines_df
def top_ingredients(n, df):
'''
This function will return the Top n ingredients per cuisine in a
given dataframe
input
n: Number of top ingredients to retrieve
df: recipes dataframe
output
top_ingr_df: dataframe containing the top n ingredients per cuisine
'''
grouped_recipes = df.groupby("cuisine").sum()
number_cuisines = grouped_recipes.shape[0]
ingr_dict = {}
for i in range(0, number_cuisines):
ocurrences = grouped_recipes.loc[i].sort_values(
ascending = False).reset_index().iloc[0:n]
ocurrences.columns = ["ingredient", "ocurrence"]
top = ocurrences["ingredient"].to_list()
ingr_dict[f"{i}"] = top
top_ingr_df = pd.DataFrame(data = ingr_dict)
return top_ingr_df
def specific_ingredients(df):
'''
This function retrieves the specific ingredients that are only used in a
cuisine.
input:
df: recipes dataframe
output:
specific_df: dataframe containing the specific ingredients used in a cuisine
'''
grouped_recipes = df.groupby("cuisine").sum().reset_index()
recipes_feats = df.drop(columns = "cuisine")
ocur = []
spec_ingr = []
cuisine = []
for i in recipes_feats.columns:
a = grouped_recipes[["cuisine", i]].where(grouped_recipes[i] >0).dropna()
if a.shape[0] == 1:
cuisine.append(a["cuisine"].iloc[0])
ocur.append(a[i].iloc[0])
spec_ingr.append(i)
specific_df = pd.DataFrame(data = zip(spec_ingr, cuisine, ocur),
columns = ["ingredient", "cuisine", "occurrence"])
return specific_df
def countries():
'''
This function creates a dataframe with the country and its cuisine names
input:
None
output:
countries_df: Pandas dataframe containing the country and cuisine names
'''
countries = ['UK', 'Greece', 'Thailand', 'Italy', 'India', 'Spain',
'Germany', 'Morocco', 'Mexico', 'Japan', 'France', 'China']
countries_adjective = ['English', 'Greek', 'Thai', 'Italian', 'Indian',
'Spanish', 'German', 'Moroccan', 'Mexican', 'Japanese', 'French', 'Chinese']
countries_df = pd.DataFrame(data= zip(countries, countries_adjective),
columns = ["country", "cuisine_name"])
return countries_df
def mallet():
pass