Несколько актеров Pykka используют 100% ЦП, если запускают 50 актеров за раз и требуют 40000 одновременно.

Я работаю над программой загрузки через Pykka и раньше использовала несколько актеров Pykka за раз за итерацией (аналогичная работа, выполняемая группой актеров, повторяется несколько раз)

50 актеров используют все оперативную память и процессор My Hardware детали i7 ноутбук 7-го поколения 7700 MSI, GL627QF 4 ядра 8 логических модулей, базовая тактовая частота 2.8

8 ГБ DDR 4 оперативной памяти Как мне увеличить количество актеров на той же машине, или мне нужно больше ядра, или мне нужно больше оперативной памяти, или мне нужно больше всего этого.

Я попробовал ноутбук с более высокой оперативной памятью и ноутбук следующего поколения, но он также использует 100% 4-ядерный процессор, 16 ГБ оперативной памяти, i7-8550 8-го поколения,

он использует оперативную память 14 ГБ для 100 актеров, но процессор использует хиты 100% после 57 актеров

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
import time
import re
import tempfile
from PIL import Image, ImageEnhance
from scipy.misc import toimage
from IPython.display import display
import numpy as np
import cv2
import base64 
import io
from pytesseract import image_to_string
import pytesseract
import pdfkit
import os
from datetime import date
import pyautogui 
import pykka

pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"

def creatFolder(path):
    if not os.path.exists(path):
        os.makedirs(path)    

def de_captcha(base64String): 
    imgdata = base64.b64decode(base64String)

    # Convert the image file to a Numpy array and read it into a OpenCV file.
    captcha = np.asarray(bytearray(imgdata), dtype="uint8")
    captcha = cv2.imdecode(captcha, cv2.IMREAD_GRAYSCALE)    

    captcha = cv2.resize(captcha, (0,0), fx=2.5, fy=2.5)
    captcha=cv2.medianBlur(captcha, 3)

    # Erode the image to remove dot noise 
    captcha = cv2.erode(captcha, np.ones((2, 2), dtype=np.uint8))    

    # Some cosmetic 
    captcha = cv2.fastNlMeansDenoising(captcha, h=46)

    # Turn the Numpy array back into a image
    captcha = toimage(captcha)    
    return re.sub('[^A-Z]+', '', image_to_string(captcha))

def htmlString2Pdf(htmlString,filename,vil_num,storedPath):
    config = pdfkit.configuration(wkhtmltopdf="C:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe")
    options = {'encoding': "UTF-8"}
    temp_file="temp"+str(vil_num)
    pdfkit.from_string(htmlString,storedPath+temp_file, configuration=config, options=options)    
    os.rename(storedPath+temp_file,storedPath+filename)

delay_long=10
delay=2

class SeleniumClass:       
    def __init__(self, web_driver,vil_num,fix_param):
        self.driver = web_driver
        self.wait=WebDriverWait(web_driver,delay_long)
        self.vil_num=vil_num
        self.sec_dist_vil=fix_param.split('-')

        #Create log dir for each Taluka
        #creatFolder(basedFolder+"/logs/"+fix_param)

        #Create Surveys dir for each Taluka
        creatFolder(basedFolder+"/Surveys/"+fix_param)

        # Create files
        self.checkpoint_path=basedFolder+"/Surveys/"+fix_param+"/checkpoint"+str(self.vil_num)

        # Create checkpoint file if not exist
        # Containing num, surOpt and phone number
        if not os.path.exists(self.checkpoint_path):
            with open(self.checkpoint_path, "w", encoding="utf-8") as f:
                f.write("1")   

         # Read the check point file
        with open(self.checkpoint_path, "r", encoding="utf-8") as f:
                checkpoint=f.read()

        # Get param from checkpoint
        self.checkpoint_params=checkpoint.split(',')

        self.avail_sur=basedFolder+"/Surveys/"+fix_param+"/village"+str(self.vil_num)
        # Create checkpoint file if not exist
        if not os.path.exists(self.avail_sur):
            with open(self.avail_sur, "w", encoding="utf-8") as f:
                f.write("")  

        # Save all finished actors on this file
        self.finished_path=basedFolder+"/Surveys/"+fix_param+"/A_Finish"
        # Create checkpoint file if not exist
        if not os.path.exists(self.finished_path):
            with open(self.finished_path, "w", encoding="utf-8") as f:
                f.write("\n")  

    def getElement(self,idName):        
        self.wait.until(expected_conditions.visibility_of_element_located((By.ID,idName)))
        return self.driver.find_element(By.ID,idName)

    def getElementXpath(self,xPath):
        self.wait.until(expected_conditions.visibility_of_element_located((By.XPATH,xPath)))
        return self.driver.find_element(By.XPATH,xPath)          

    def run_process(self):
        # Get the Go button
        sectionGo = self.driver.find_element(By.CSS_SELECTOR,"input[type='button'][value='Go']")

        # Select section
        secOpts = Select(self.getElement("list")).options;

        # Initialize all indexes        
        num_index=int(self.checkpoint_params[0])
        avail_sur_num=""

        try:
            # Get the specific section
            secOpt = secOpts[int(self.sec_dist_vil[0])]
            secOpt.click()    
            time.sleep(delay);

            #Click go
            sectionGo.click();
            time.sleep(delay);    

            # Switch to new window opened
            self.driver.switch_to.window(self.driver.window_handles[1])
            time.sleep(10);

            # Select district
            distOpts = Select(self.getElement("distSelect")).options;
            # Get the specific district
            distOpt = distOpts[int(self.sec_dist_vil[1])]
            distOpt.click()
            time.sleep(delay);

            # Select Taluka
            talOpts = Select(self.getElement("talSelect")).options;
            # Get the specific Taluka
            talOpt = talOpts[int(self.sec_dist_vil[2])]
            talOpt.click()
            time.sleep(delay);

            # Select Village
            vilOpts = Select(self.getElement("vilSelect")).options;
            # Get the specific Village
            vilOpt = vilOpts[self.vil_num]
            vilOpt.click()
            time.sleep(delay);

            # Survey Button
            surveyBut = self.getElement("rbsryno");
            surveyBut.click();
            time.sleep(delay);

            # Survey number field
            surveyNum = self.getElementXpath("//td[@class='input-text-column']/input");                


            #int(self.checkpoint_params[0])
            for num in range (int(self.checkpoint_params[0]),1501):
                #Clear the text field
                surveyNum.clear()
                surveyNum.send_keys(num);
                num_index=num
                time.sleep(delay);

                # Get the Survey search button
                surveySearch = self.getElementXpath("//div[@class='tab-sec']/div/div[4]/table/tbody/tr[1]/td[2]/input");
                surveySearch.click();
                time.sleep(delay);

                # Alert occurs if survey number doesn't exist
                try:
                    wait=WebDriverWait(self.driver,3)
                    wait.until(expected_conditions.alert_is_present())
                    self.driver.switch_to_alert().accept()
                    time.sleep(delay);    
                except TimeoutException:
                    #for checkpoint
                    avail_sur_num=avail_sur_num+str(num)+","         


            # Close the current window and switch to the first window (Section)
            self.driver.close()
            self.driver.switch_to.window(self.driver.window_handles[0])
            time.sleep(delay);

            # Complete the task
            with open(self.checkpoint_path, "w", encoding="utf-8") as f:
                f.write(str(num_index)) 

            # Save available survey numbers to file
            with open(self.avail_sur, "a", encoding="utf-8") as f:
                f.write(avail_sur_num)



            # Save to A_Finish file
            with open(self.finished_path, "a", encoding='utf-8') as f:
                f.write("Finished: "+str(self.vil_num)+"\n") 
            self.driver.quit()
            return 0

        except Exception as e:
            print(e)
            print("Connection was refused. Saving current position...")

            # Save available survey numbers to file
            with open(self.avail_sur, "a", encoding="utf-8") as f:
                f.write(avail_sur_num)

            # Save the last check point
            with open(self.checkpoint_path, "w", encoding="utf-8") as f:
                f.write(str(num_index))

            # Close all windows
            self.driver.quit()
            return 1   


class VillageActor(pykka.ThreadingActor):    
    def on_receive(self, message):
        mess_arg=message.split(",")
        fix_param=mess_arg[0]
        vil_num=int(mess_arg[1])
        while(True):
            chrome_options = Options()
            chrome_options.add_argument("--headless")  
            #chrome_options.add_extension("C:/Users/Tinh Pham/Desktop/Project/Google_translate_extension.crx")

            driver=webdriver.Chrome("C:/Users/Tinh Pham/Desktop/Project/Chrome driver/chromedriver", chrome_options=chrome_options)
            #driver=webdriver.Chrome("C:/Users/Tinh Pham/Desktop/Project/Chrome driver/chromedriver")

            # The URL of the website
            websiteUrl = "https://bhulekh.mahabhumi.gov.in/";

            # Use Chrome to access the url
            driver.get(websiteUrl);            
            return_value=SeleniumClass(driver,vil_num,fix_param).run_process()            

            print("return_value: ",return_value)
            if(return_value==0):
                print("\n*******FINISH: ", message," ******\n")
                break

            # Wait for 2 minute before restart the process
            time.sleep(60)

storedPath="R:/Dowloaded Files/"
general_info_path="C:/Users/Tinh Pham/Desktop/Project/Python notebook/GeneralInfo"
taluka_num=2

basedFolder=storedPath+"2019-06-17-final"
creatFolder(basedFolder)

with open(general_info_path, "r", encoding="utf-8") as f:
    first_run=f.readlines()[taluka_num-1]
fix_param_all=first_run.split(":")[0]
fix_param=fix_param_all.split(",")[0]
actor_num=int(fix_param_all.split(",")[1])

for i in range(41,81):
    print(" Start actor: ",i)
    VillageActor.start().tell(fix_param+","+str(i)) 
    time.sleep(20)

0 ответов

Другие вопросы по тегам