Scrapy Shell имеет правильный вывод, но скрипт не

Question

Scrapy Shell имеет правильный вывод, но скрипт не

Так что я очень смущен здесь. Когда я использую оболочку scrapy и ввожу xpath, возвращаются правильные данные, но когда я устанавливаю тот же самый xpath равным переменной в скрипте, он выводит пробел. Я действительно не уверен, что происходит.

import scrapy

Класс FestivalSpider(scrapy.Spider): name = 'баскетбол'

custom_settings = {
    "DOWNLOAD_DELAY": 3,
    "CONCURRENT_REQUESTS_PER_DOMAIN": 3,
    "HTTPCACHE_ENABLED": True
}

start_urls = [
    'http://www.basketball-reference.com/leagues/NBA_2017_per_game.html'
]

def parse(self, response):
    start_urls = [
    'http://www.basketball-reference.com/leagues/NBA_2017_per_game.html']
    for href in start_urls:
        yield scrapy.Request(
            url=href,
            callback=self.parse_festival,
            meta={'url': href}
        )


def parse_festival(self, response):

    url = response.request.meta['url']

    name = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "player"]/a/text()').extract()

    pos = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "pos"]/text()').extract()

    age = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "age"]/text()').extract()

    #team = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "team_id"]/text()').extract()

    games = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "g"]/text()').extract()

    games_s = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "gs"]/text()').extract()

    fg_per_mp = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg_per_mp"]/text()').extract()

    #fga_per_mp = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fga_per_mp"]/text()').extract()

    #fg_pct = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg_pct"]/text()').extract()

    #fg3_per_mp = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg3_per_mp"]/text()').extract()

    #fg3a_per_mp = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg3a_per_mp"]/text()').extract()

    #fg3_pct = response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg3_pct"]/text()').extract()
    #location = (
    #    response.xpath('//div[@id="festival-basics"]/text()').extract()[3])

    #dates = (
    #    response.xpath('//div[@id="festival-basics"]/text()').extract()[5])

    #tickets = (
    #    response.xpath('//div[@id="festival-basics"]/text()').extract()[7])

    #website = (
    #    response.xpath(
    #        '//div[@id="festival-basics"]/a/@href').extract()[0]
    #)

    #logo = (
    #    response.xpath(
    #        '//div[@id="festival-basics"]/img/@src').extract()[0]
    #)

    #lineup = (
    #    response.xpath(
    #        '//div[@class="lineupguide"]/ul/li/text()').extract() +
    #    response.xpath(
    #        '//div[@class="lineupguide"]/ul/li/a/text()').extract()
    #)
    print(response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg_per_mp"]/text()').extract())

    yield {
        'url': url,
        'name': name,
        'pos': pos,
        'age' : age,
        'games' : games,
        'games_s': games_s, 
        'fg_per_mp': fg_per_mp
        #'fga_per_mp': fga_per_mp,
        #'fg_pct': fg_pct,
        #'fg3_per_mp': fg3_per_mp,
        #'fg3a_per_mp': fg3a_per_mp
        #'team' : team
        #'location': location,
        #'dates': dates,
        #'tickets': tickets,
        #'website': website,
        #'logo': logo,
        #'lineup': lineup
        }

Этот вопрос - fg_per_mp, когда я использую response.xpath('//tr[@class = "full_table"]/td[@data-stat = "fg_per_mp"]/text()'). Extract() в оболочке это работает, но та же строка в скрипте возвращает пустой список.

Что я делаю неправильно?

2

shell xpath scrapy scrapy-spider scrapy-shell

Источник

user4066663 07 июл '17 в 21:05

0 ответов

Другие вопросы по тегам shell xpath scrapy scrapy-spider scrapy-shell