BeautifulSoup HTML 파싱 코드

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
def getBS(url):
try:
html = urlopen(url)
except HTTPError as e: #페이지를 읽을 수 없을 때 예외 처리
print(e)
return None
try:
bsObj = BeautifulSoup(html.read(),'html.parser')
except AttributeError as e: #태그를 찾을 수 없을 때 예외 처리
print(e)
return None
return bsObj
from urllib.request import urlopen from urllib.error import HTTPError from bs4 import BeautifulSoup def getBS(url): try: html = urlopen(url) except HTTPError as e: #페이지를 읽을 수 없을 때 예외 처리 print(e) return None try: bsObj = BeautifulSoup(html.read(),'html.parser') except AttributeError as e: #태그를 찾을 수 없을 때 예외 처리 print(e) return None return bsObj
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup

def getBS(url):
    try:
        html = urlopen(url)

    except HTTPError as e:  #페이지를 읽을 수 없을 때 예외 처리
        print(e)
        return None
    
    try:
        bsObj = BeautifulSoup(html.read(),'html.parser')

    except AttributeError as e: #태그를 찾을 수 없을 때 예외 처리
        print(e)
        return None
    
    return bsObj

기본 함수

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
names = bsObj.find_all(name="span",attrs={"class":"green"})
for name in names:
print(name)
names = bsObj.find_all(name="span",attrs={"class":"green"}) for name in names: print(name)
names = bsObj.find_all(name="span",attrs={"class":"green"})
for name in names:
    print(name)

span class가 green인 항목들을 모두 가져오기

Comments

답글 남기기

이메일 주소는 공개되지 않습니다. 필수 필드는 *로 표시됩니다