Generation

generate functionWed, 08 Feb 2023

def get_needed_posts(query): for article in articles: try: title = article.find('h2', class_='tm-article').text date = article.find('span', class_='tm-article').text.strip() link = article.find('h2', class_='tm-article').find('a').get('href') except: pass row = {'date': date, 'title': title, 'link': 'https://habr.com'+link} site = pd.concat([site, pd.DataFrame([row])]) return site.reset_index(drop=True) Modify the Python function above so that if it finds a repeated title or link, it will not add it to the return

def search(query): query = query.lower() site = pd.DataFrame() for page in range(0, 10): if page == 0: url = 'https://habr.com/ru/all/' else: url = 'https://habr.com/ru/all/page' + str(page) + '/' req = requests.get(url) soup = BeautifulSoup(req.text, 'html.parser') articles = soup.find_all('li', class_='content-list__item_post') site = pd.concat([site, get_needed_posts(query, articles)]) return site.reset_index(drop=True) def get_needed_posts(query, articles): site = pd.DataFrame() for article in articles: title, date, link = get_article_info(article) if not title in list(site['title']) and not link in list(site['link']): row = {'

Questions about programming?Chat with your personal AI assistant