Machine Learning
[Scraping] Naver '이 시각 주요 뉴스' 목록 가져 오기
J-sean
2018. 12. 30. 19:07
반응형
urllib와 beautifulsoup을 이용해 Naver에서 '이 시각 주요 뉴스' 목록을 가져온다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | from bs4 import BeautifulSoup as bs import urllib.request as req url = 'https://news.naver.com/' res = req.urlopen(url) soup = bs(res, 'html.parser') title1 = soup.find('h4', 'tit_h4').string # 'tit_h4 tit_main1' # title1 = soup.find(attrs = {'class' : 'tit_h4 tit_main1'}).string print('\t\tNAVER', title1, '\n') print('-'*20, 'find_all()', '-'*20) headlines1 = soup.find_all('a', 'nclicks(hom.headcont)') # headlines1 = soup.find_all(attrs = {'class' : 'nclicks(hom.headcont)'}) # The find_all() method looks through a tag’s descendants and # retrieves all descendants that match your filters. for i, news in enumerate(headlines1): #if news.string == None: # print('%2d: None' %i) # continue print('%2d: %s' %(i + 1, news.string)) print('-'*20, 'select()', '-'*20) headlines2 = soup.select('div.newsnow_tx_inner > a') # Beautiful Soup supports the most commonly-used CSS selectors. Just pass a # string into the .select() method of a Tag object or the BeautifulSoup object itself. for i, news in enumerate(headlines2): #if news.string == None: # print('%2d: None' %i) # continue print('%2d: %s' %(i + 1, news.string)) | cs |
반응형