KOTA ITO

Generation

generate functionFri, 16 Dec 2022

beautifulsoupを使って、X-path を’//*[@id="rankingChart"]/tbody/tr[22]/td'と指定し、文字列をすべて取得する

from bs4 import BeautifulSoup import requests from bs4 import BeautifulSoup import urllib.request from urllib.request import urlopen from bs4 import BeautifulSoup from urllib.parse import urljoin from urllib.parse import urlparse def get_title(url): html = urlopen(url) soup = BeautifulSoup(html, "html.parser") return soup.title.text def get_links(url): a = urlparse(url) base = a.scheme + "://" + a.netloc html = urlopen(url) soup = BeautifulSoup(html, "html.parser") links = [] for link in soup.find_all("a"): href = link.attrs["href"] if href.startswith("/"): url = base + href if url not in links: links.append(url) elif base in href: if href not in links

Questions about programming?Chat with your personal AI assistant