selenium
BeautifulSoup
Product information
id
productDetails_detailBullets_sections1
productDetails_techSpec_section_1
import csv
from selenium import webdriver
from bs4 import BeautifulSoup
links = [
'https://www.amazon.com/Instant-Pot-Multi-Use-Programmable-Packaging/dp/B00FLYWNYQ/ref=sr_1_1?s=home-garden&ie=UTF8&qid=1520264922&sr=1-1&keywords=-gggh',
'https://www.amazon.com/Amagle-Flexible-Batteries-Operated-Included/dp/B01NGTKTDK/ref=sr_1_2?s=furniture&ie=UTF8&qid=1520353343&sr=1-2&keywords=-jhgf'
]
def get_information(driver,urls):
with open("productDetails.csv","w",newline="") as infile:
writer = csv.writer(infile)
writer.writerow(['Title','Dimension','Weight','ASIN'])
for url in urls:
driver.get(url)
soup = BeautifulSoup(driver.page_source,"lxml")
title = soup.select_one("#productTitle").get_text(strip=True)
dimension = ([item.select_one("td").get_text(strip=True) for item in soup.select("#prodDetails [id^='productDetails_'] tr") if "Product Dimensions" in item.text]+["N\A"])[0]
weight = ([item.select_one("td").get_text(strip=True) for item in soup.select("#prodDetails [id^='productDetails_'] tr") if "Item Weight" in item.text]+["N\A"])[0]
ASIN = ([item.select_one("td").get_text(strip=True) for item in soup.select("#prodDetails [id^='productDetails_'] tr") if "ASIN" in item.text]+["N\A"])[0]
writer.writerow([title,dimension,weight,ASIN])
print(f'{title}\n{dimension}\n{weight}\n{ASIN}\n')
if __name__ == '__main__':
driver = webdriver.Chrome()
try:
get_information(driver,links)
finally:
driver.quit()