Несколько актеров Pykka используют 100% ЦП, если запускают 50 актеров за раз и требуют 40000 одновременно.
Я работаю над программой загрузки через Pykka и раньше использовала несколько актеров Pykka за раз за итерацией (аналогичная работа, выполняемая группой актеров, повторяется несколько раз)
50 актеров используют все оперативную память и процессор My Hardware детали i7 ноутбук 7-го поколения 7700 MSI, GL627QF 4 ядра 8 логических модулей, базовая тактовая частота 2.8
8 ГБ DDR 4 оперативной памяти Как мне увеличить количество актеров на той же машине, или мне нужно больше ядра, или мне нужно больше оперативной памяти, или мне нужно больше всего этого.
Я попробовал ноутбук с более высокой оперативной памятью и ноутбук следующего поколения, но он также использует 100% 4-ядерный процессор, 16 ГБ оперативной памяти, i7-8550 8-го поколения,
он использует оперативную память 14 ГБ для 100 актеров, но процессор использует хиты 100% после 57 актеров
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
import time
import re
import tempfile
from PIL import Image, ImageEnhance
from scipy.misc import toimage
from IPython.display import display
import numpy as np
import cv2
import base64
import io
from pytesseract import image_to_string
import pytesseract
import pdfkit
import os
from datetime import date
import pyautogui
import pykka
pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
def creatFolder(path):
if not os.path.exists(path):
os.makedirs(path)
def de_captcha(base64String):
imgdata = base64.b64decode(base64String)
# Convert the image file to a Numpy array and read it into a OpenCV file.
captcha = np.asarray(bytearray(imgdata), dtype="uint8")
captcha = cv2.imdecode(captcha, cv2.IMREAD_GRAYSCALE)
captcha = cv2.resize(captcha, (0,0), fx=2.5, fy=2.5)
captcha=cv2.medianBlur(captcha, 3)
# Erode the image to remove dot noise
captcha = cv2.erode(captcha, np.ones((2, 2), dtype=np.uint8))
# Some cosmetic
captcha = cv2.fastNlMeansDenoising(captcha, h=46)
# Turn the Numpy array back into a image
captcha = toimage(captcha)
return re.sub('[^A-Z]+', '', image_to_string(captcha))
def htmlString2Pdf(htmlString,filename,vil_num,storedPath):
config = pdfkit.configuration(wkhtmltopdf="C:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe")
options = {'encoding': "UTF-8"}
temp_file="temp"+str(vil_num)
pdfkit.from_string(htmlString,storedPath+temp_file, configuration=config, options=options)
os.rename(storedPath+temp_file,storedPath+filename)
delay_long=10
delay=2
class SeleniumClass:
def __init__(self, web_driver,vil_num,fix_param):
self.driver = web_driver
self.wait=WebDriverWait(web_driver,delay_long)
self.vil_num=vil_num
self.sec_dist_vil=fix_param.split('-')
#Create log dir for each Taluka
#creatFolder(basedFolder+"/logs/"+fix_param)
#Create Surveys dir for each Taluka
creatFolder(basedFolder+"/Surveys/"+fix_param)
# Create files
self.checkpoint_path=basedFolder+"/Surveys/"+fix_param+"/checkpoint"+str(self.vil_num)
# Create checkpoint file if not exist
# Containing num, surOpt and phone number
if not os.path.exists(self.checkpoint_path):
with open(self.checkpoint_path, "w", encoding="utf-8") as f:
f.write("1")
# Read the check point file
with open(self.checkpoint_path, "r", encoding="utf-8") as f:
checkpoint=f.read()
# Get param from checkpoint
self.checkpoint_params=checkpoint.split(',')
self.avail_sur=basedFolder+"/Surveys/"+fix_param+"/village"+str(self.vil_num)
# Create checkpoint file if not exist
if not os.path.exists(self.avail_sur):
with open(self.avail_sur, "w", encoding="utf-8") as f:
f.write("")
# Save all finished actors on this file
self.finished_path=basedFolder+"/Surveys/"+fix_param+"/A_Finish"
# Create checkpoint file if not exist
if not os.path.exists(self.finished_path):
with open(self.finished_path, "w", encoding="utf-8") as f:
f.write("\n")
def getElement(self,idName):
self.wait.until(expected_conditions.visibility_of_element_located((By.ID,idName)))
return self.driver.find_element(By.ID,idName)
def getElementXpath(self,xPath):
self.wait.until(expected_conditions.visibility_of_element_located((By.XPATH,xPath)))
return self.driver.find_element(By.XPATH,xPath)
def run_process(self):
# Get the Go button
sectionGo = self.driver.find_element(By.CSS_SELECTOR,"input[type='button'][value='Go']")
# Select section
secOpts = Select(self.getElement("list")).options;
# Initialize all indexes
num_index=int(self.checkpoint_params[0])
avail_sur_num=""
try:
# Get the specific section
secOpt = secOpts[int(self.sec_dist_vil[0])]
secOpt.click()
time.sleep(delay);
#Click go
sectionGo.click();
time.sleep(delay);
# Switch to new window opened
self.driver.switch_to.window(self.driver.window_handles[1])
time.sleep(10);
# Select district
distOpts = Select(self.getElement("distSelect")).options;
# Get the specific district
distOpt = distOpts[int(self.sec_dist_vil[1])]
distOpt.click()
time.sleep(delay);
# Select Taluka
talOpts = Select(self.getElement("talSelect")).options;
# Get the specific Taluka
talOpt = talOpts[int(self.sec_dist_vil[2])]
talOpt.click()
time.sleep(delay);
# Select Village
vilOpts = Select(self.getElement("vilSelect")).options;
# Get the specific Village
vilOpt = vilOpts[self.vil_num]
vilOpt.click()
time.sleep(delay);
# Survey Button
surveyBut = self.getElement("rbsryno");
surveyBut.click();
time.sleep(delay);
# Survey number field
surveyNum = self.getElementXpath("//td[@class='input-text-column']/input");
#int(self.checkpoint_params[0])
for num in range (int(self.checkpoint_params[0]),1501):
#Clear the text field
surveyNum.clear()
surveyNum.send_keys(num);
num_index=num
time.sleep(delay);
# Get the Survey search button
surveySearch = self.getElementXpath("//div[@class='tab-sec']/div/div[4]/table/tbody/tr[1]/td[2]/input");
surveySearch.click();
time.sleep(delay);
# Alert occurs if survey number doesn't exist
try:
wait=WebDriverWait(self.driver,3)
wait.until(expected_conditions.alert_is_present())
self.driver.switch_to_alert().accept()
time.sleep(delay);
except TimeoutException:
#for checkpoint
avail_sur_num=avail_sur_num+str(num)+","
# Close the current window and switch to the first window (Section)
self.driver.close()
self.driver.switch_to.window(self.driver.window_handles[0])
time.sleep(delay);
# Complete the task
with open(self.checkpoint_path, "w", encoding="utf-8") as f:
f.write(str(num_index))
# Save available survey numbers to file
with open(self.avail_sur, "a", encoding="utf-8") as f:
f.write(avail_sur_num)
# Save to A_Finish file
with open(self.finished_path, "a", encoding='utf-8') as f:
f.write("Finished: "+str(self.vil_num)+"\n")
self.driver.quit()
return 0
except Exception as e:
print(e)
print("Connection was refused. Saving current position...")
# Save available survey numbers to file
with open(self.avail_sur, "a", encoding="utf-8") as f:
f.write(avail_sur_num)
# Save the last check point
with open(self.checkpoint_path, "w", encoding="utf-8") as f:
f.write(str(num_index))
# Close all windows
self.driver.quit()
return 1
class VillageActor(pykka.ThreadingActor):
def on_receive(self, message):
mess_arg=message.split(",")
fix_param=mess_arg[0]
vil_num=int(mess_arg[1])
while(True):
chrome_options = Options()
chrome_options.add_argument("--headless")
#chrome_options.add_extension("C:/Users/Tinh Pham/Desktop/Project/Google_translate_extension.crx")
driver=webdriver.Chrome("C:/Users/Tinh Pham/Desktop/Project/Chrome driver/chromedriver", chrome_options=chrome_options)
#driver=webdriver.Chrome("C:/Users/Tinh Pham/Desktop/Project/Chrome driver/chromedriver")
# The URL of the website
websiteUrl = "https://bhulekh.mahabhumi.gov.in/";
# Use Chrome to access the url
driver.get(websiteUrl);
return_value=SeleniumClass(driver,vil_num,fix_param).run_process()
print("return_value: ",return_value)
if(return_value==0):
print("\n*******FINISH: ", message," ******\n")
break
# Wait for 2 minute before restart the process
time.sleep(60)
storedPath="R:/Dowloaded Files/"
general_info_path="C:/Users/Tinh Pham/Desktop/Project/Python notebook/GeneralInfo"
taluka_num=2
basedFolder=storedPath+"2019-06-17-final"
creatFolder(basedFolder)
with open(general_info_path, "r", encoding="utf-8") as f:
first_run=f.readlines()[taluka_num-1]
fix_param_all=first_run.split(":")[0]
fix_param=fix_param_all.split(",")[0]
actor_num=int(fix_param_all.split(",")[1])
for i in range(41,81):
print(" Start actor: ",i)
VillageActor.start().tell(fix_param+","+str(i))
time.sleep(20)