forked from kaysabelle/smokeythebear
-
Notifications
You must be signed in to change notification settings - Fork 1
/
triplescraper.py
65 lines (51 loc) · 2.18 KB
/
triplescraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Python script that scraps the adjective fire danger information
"""
from bs4 import BeautifulSoup, NavigableString
import datetime
try:
from urllib2 import urlopen
except ImportError:
from urllib.request import urlopen # py3k
# Get the report for the current date
today = datetime.date.today()
todaysdate = today.strftime('%d') + today.strftime('%b').upper() + today.strftime('%y')
# Get the report for yesterday's date as well
yesterday = datetime.date.today() - datetime.timedelta(days=1)
yesterdaysdate = yesterday.strftime('%d') + yesterday.strftime('%b').upper() + yesterday.strftime('%y')
#lapanza = "44914"
#lastablas = "44904"
#arroyogrande = "44915"
#sansimeon = "44917"
url_lt = "https://fam.nwcg.gov/wims/xsql/nfdrs.xsql?stn=44904&start=" + yesterdaysdate + "&end=" + todaysdate + "&user=4e1"
print "Getting Las Tablas data from %s" %url_lt
soup_lt = BeautifulSoup(urlopen(url_lt))
url_lp = "https://fam.nwcg.gov/wims/xsql/nfdrs.xsql?stn=44914&start=" + yesterdaysdate + "&end=" + todaysdate + "&user=4e1"
print "Getting La Panza data from %s" %url_lp
soup_lp = BeautifulSoup(urlopen(url_lp))
url_ag = "https://fam.nwcg.gov/wims/xsql/nfdrs.xsql?stn=44915&start=" + yesterdaysdate + "&end=" + todaysdate + "&user=4e1"
print "Getting Arroyo Grande data from %s" %url_ag
soup_ag = BeautifulSoup(urlopen(url_ag))
url_slc = "https://fam.nwcg.gov/wims/xsql/nfdrs.xsql?stn=44917&start=" + yesterdaysdate + "&end=" + todaysdate + "&user=4e1"
print "Getting San Simeon data from %s" %url_slc
soup_slc = BeautifulSoup(urlopen(url_slc))
# Remove unnecessary html tags to get clean xml
invalid_tags = ['html', 'body']
for tag in invalid_tags:
for match in soup_lt.findAll(tag):
match.replaceWithChildren()
for match in soup_lp.findAll(tag):
match.replaceWithChildren()
for match in soup_ag.findAll(tag):
match.replaceWithChildren()
for match in soup_slc.findAll(tag):
match.replaceWithChildren()
# Write to file
with open("lastablas.xml", "wb") as file:
file.write(bytes(soup_lt))
with open("lapanza.xml", "wb") as file:
file.write(bytes(soup_lp))
with open("arroyogrande.xml", "wb") as file:
file.write(bytes(soup_ag))
with open("sansimeon.xml", "wb") as file:
file.write(bytes(soup_slc))