Machine Learning
Beautifulsoup XML 분석
J-sean
2019. 1. 15. 22:47
반응형
Beautifulsoup을 이용해 XML을 분석할 수 있다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | from bs4 import BeautifulSoup import urllib.request as req import os.path url = "http://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=108" # 기상청 날씨누리 전국 중기예보 RSS filename = "forecast.xml" if not os.path.exists(filename): #req.urlretrieve(url, savename) # Legacy interface. It might become deprecated at some point in the future. with req.urlopen(url) as contents: xml = contents.read().decode("utf-8") # If the end of the file has been reached, read() will return an empty string (''). #print(xml) with open(filename, mode="wt") as f: f.write(xml) with open(filename, mode="rt") as f: xml = f.read() soup = BeautifulSoup(xml, "html.parser") # html.parser는 모든 태그를 소문자로 바꾼다. #print(soup) print("[", soup.find("title").string, "]") print(soup.find("wf").string, "\n") # 날씨에 따른 지역 분류 info = {} # empty dicionary for location in soup.find_all("location"): name = location.find("city").string weather = location.find("wf").string if not (weather in info): info[weather] = [] # empty list. dictionary는 list를 value로 가질 수 있다. info[weather].append(name) for weather in info.keys(): # Return a new view of the dictionary’s keys. print("■", weather) for name in info[weather]: print("|-", name) | cs |
반응형