Текст разбивается на новые строки из-за тега <br> при очистке
Привет всем, я пытаюсь очистить страницу и данные на ней, но не могу получить ожидаемые данные в одной строке из-за тегов br, присутствующих в тексте, из-за которых данные перемещаются в новую строку, любая помощь будет оценена.
Вот мой код:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time
import random
import re
driver = webdriver.Chrome(r"C:\Users\Acer\AppData\Local\Programs\Python\Python36-32\chromedriver.exe")
with open(r"E:\office\Zoro\Zoro11.txt" ,'r')as f:
content = f.readlines()
content = [x.strip() for x in content]
currentIndex = 0
with open(r"E:\office\Zoro\Zoro16.csv", 'a')as f1:
f1.write("Product Url" + "," + "Main category"+"," + "Sub category"+"," + "Leaf category"+","+"Title" + "," + "Prodcutid"+","+"Manufacturer Name" + "," + "Manufacturer's number"+","+"reviews"+","+"Price"+","+"smalldesc"+","+"Product Dimensions"+","+"Main Image"+","+"Sub Image")
f1.write("\n")
for link in content[currentIndex:]:
driver.get(link)
time.sleep(5)
aj = driver.find_element_by_class_name('zcl-breadcrumb__list').text.replace("Home\n","").replace("\n",">").replace(",","")
try:
title = driver.find_element_by_xpath("//*[@data-za='product-name']").text.replace(",","").replace("\n","")
except:
title = "No title"
try:
brand = driver.find_element_by_xpath("//*[@data-za='product-brand-name']").text.replace(",","")
except:
brand = "No Brand"
try:
productid = driver.find_element_by_xpath("//*[@data-za='PDPZoroNo']").text.replace(",", "")
except:
productid = "No productid"
maincategory = aj[:aj.find(">")]
Leafcategory = aj[aj.rindex('>')+1:]
Subcategory = aj[(aj.find(">"))+1:aj.rindex('>')]
mfrnu = driver.find_element_by_xpath("//*[@data-za='PDPMfrNo']").text.replace(",", "")
try:
mainimage = driver.find_element_by_xpath('//*[@id="app"]/div[3]/div[2]/div/div/main/div[1]/div[3]/div/div[1]/div/div/div/div/div/img').get_attribute('src')
except:
mainimage = "No mainimage"
try:
price = driver.find_element_by_class_name('product-price__price').text
except:
price = "No price"
try:
smalldesc = driver.find_element_by_class_name('product-attributes').text.replace("\n",";").replace(",","")
except:
smalldesc = "No desc"
try:
specification = driver.find_element_by_css_selector('.product-specifications__table.table.table-striped').text.replace("\n",";").replace(",","").strip()
print(specification)
except:
specification = "No sepcs"
try:
productdesc = driver.find_element_by_css_selector('.product-description__text').text.replace(",","")
except:
productdesc = "No productfields"
f1.write(link + "," + maincategory + "," + Subcategory + ","+ Leafcategory + ","+title + "," + productid + "," + brand + "," + mfrnu + "," + price +"," + smalldesc +"," + specification +"," + productdesc +"," + mainimage +"\n")
в приведенном выше коде в productdesc я получаю следующий вывод
Pro Series Swivel Head LED Work Light
280 lumens
Beam Distance 54 meters
Run time 3 hrs
ожидаемый результат находится в одной строке с пробелом, ссылка, которую я использую для очистки, https://www.zoro.com/dorcy-pro-s-swivel-head-led-worklght-187-lumen-41-2392/i/G9576141/
0 ответов
Похоже, вам просто нужно обновить строку для чтения описания. Добавьте замену новой строки, как вы это делали в других полях:
Измените это:
productdesc = driver.find_element_by_css_selector('.product-description__text').text.replace(",","")
К этому:
productdesc = driver.find_element_by_css_selector('.product-description__text').text.replace("\n",";").replace(",","")