我试图执行这个脚本,但我不知道为什么它会同时抛出“Null”和重复值!我的目标是输入必要的值,然后单击搜索按钮,从页面中获取所有的“href”并收集数据,这很好,但同时提供了“Null”和重复值!。我不知道我到底错过了什么。
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
class RightMove2Spider(scrapy.Spider):
name = 'rightmove2'
start_urls = ["https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"]
def __init__(self, name=None, **kwargs):
chrome_options = Options()
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.set_window_size(1920, 1080)
driver.get("https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale")
price_range = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "(//option[@value='2000000'])[2]")))
price_range.click()
time.sleep(1)
bedroom_range = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "(//option[@value='5'])[1]")))
bedroom_range.click()
time.sleep(1)
tick_box = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[@class='tickbox--indicator']")))
tick_box.click()
time.sleep(1)
find_properties_btn = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[@id='submit']")))
find_properties_btn.click()
time.sleep(3)
self.property_xpath = driver.find_elements(By.XPATH, "//*[@class='l-searchResult is-list']/div/div/div[4]/div[1]/div[2]/a")
# driver.close()
super().__init__(name, **kwargs)
def parse(self, response):
for el in self.property_xpath:
href= el.get_attribute('href')
time.sleep(1)
yield SeleniumRequest(
url=href,
wait_time=3)
yield {
'Title': response.xpath("//h1[@itemprop='streetAddress']/text()").get(),
'Price': response.xpath("//div[@class='_1gfnqJ3Vtd1z40MlC0MzXu']/span/text()").get(),
'Agent Name': response.xpath("//div[@class='RPNfwwZBarvBLs58-mdN8']/a/text()").get(),
'Agent Address': response.xpath("//div[@class='OojFk4MTxFDKIfqreGNt0']/text()").get(),
'Agent Telephone': response.xpath("//a[@class='_3E1fAHUmQ27HFUFIBdrW0u']/text()").get(),
'Added on': response.xpath("//div[@class='_2nk2x6QhNB1UrxdI5KpvaF']/text()").get(),
'Links': response.url
}
for x in range(24, 1008, 24):
abs_url = f'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=5&maxPrice=2000000&index={x}&propertyTypes=&includeSSTC=true&mustHave=&dontShow=&furnishTypes=&keywords='
yield SeleniumRequest(
url= abs_url,
callback=self.parse
)
输出
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},