파이썬 requests 패키지 설치 후 requests 사용구문

r = requests.get('http://openapi.seoul.go.kr:8088/6d4d776b466c656533356a4b4b5872/json/RealtimeCityAir/1/99')
rjson = r.json()

//requests한 api주소를 r에 넣고 r을 json하여 rjson으로 변수 선언

 

크롤링

import requests
from bs4 import BeautifulSoup

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date=20200303',headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')


title = soup.select_one('#old_content > table > tbody > tr:nth-child(2) > td.title > div > a') #하나가져오기 select_one
# print(title['href']) 속성 가져오기
# print(title.text) href a 태그사이 문자가져오기
#old_content > table > tbody > tr:nth-child(2) > td.title > div > a


#old_content > table > tbody > tr
trs = soup.select('#old_content > table > tbody > tr') #여러개 가져오기 select
for tr in trs:
a_tag=tr.select_one('td.title > div > a')
if a_tag is not None:
print(a_tag)

-------------------------------------------------------------------------------------------------------

import requests
from bs4 import BeautifulSoup

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date=20200303',headers=headers)

 

//bs4와 requests 사용 선언


soup = BeautifulSoup(data.text, 'html.parser')
//soup에 html문서를 편한 text로 넣음

title = soup.select_one('#old_content > table > tbody > tr:nth-child(2) > td.title > div > a') #하나가져오기 select_one
# print(title['href']) 속성 가져오기
# print(title.text) href a 태그사이 문자가져오기
#old_content > table > tbody > tr:nth-child(2) > td.title > div > a


#old_content > table > tbody > tr
trs = soup.select('#old_content > table > tbody > tr') #여러개 가져오기 select
for tr in trs:
a_tag=tr.select_one('td.title > div > a')
b_tag=tr.select_one('td:nth-child(1) > img')
c_tag=tr.select_one('td.point')
if (a_tag is not None) :
print(b_tag['alt'],a_tag.text,c_tag.text) #img='~' 안 속성 전체 안가져오고 ['alt=']특정 속성만 가져옴

+ Recent posts