mirror of https://github.com/skygpu/skynet.git
				
				
				
			Rework gpu worker logic to work better in parallel with other workers
							parent
							
								
									2b18fa376b
								
							
						
					
					
						commit
						25c86b5eaf
					
				| 
						 | 
				
			
			@ -218,13 +218,12 @@ async def open_dgpu_node(
 | 
			
		|||
 | 
			
		||||
    def begin_work(request_id: int):
 | 
			
		||||
        logging.info('begin_work')
 | 
			
		||||
        ec, out = cleos.push_action(
 | 
			
		||||
        return cleos.push_action(
 | 
			
		||||
            'telos.gpu',
 | 
			
		||||
            'workbegin',
 | 
			
		||||
            [account, request_id],
 | 
			
		||||
            f'{account}@{permission}'
 | 
			
		||||
        )
 | 
			
		||||
        assert ec == 0
 | 
			
		||||
 | 
			
		||||
    def cancel_work(request_id: int, reason: str):
 | 
			
		||||
        logging.info('cancel_work')
 | 
			
		||||
| 
						 | 
				
			
			@ -234,7 +233,6 @@ async def open_dgpu_node(
 | 
			
		|||
            [account, request_id, reason],
 | 
			
		||||
            f'{account}@{permission}'
 | 
			
		||||
        )
 | 
			
		||||
        assert ec == 0
 | 
			
		||||
 | 
			
		||||
    def maybe_withdraw_all():
 | 
			
		||||
        logging.info('maybe_withdraw_all')
 | 
			
		||||
| 
						 | 
				
			
			@ -251,7 +249,6 @@ async def open_dgpu_node(
 | 
			
		|||
                f'{account}@{permission}'
 | 
			
		||||
            )
 | 
			
		||||
            logging.info(collect_stdout(out))
 | 
			
		||||
            assert ec == 0
 | 
			
		||||
 | 
			
		||||
    async def find_my_results():
 | 
			
		||||
        logging.info('find_my_results')
 | 
			
		||||
| 
						 | 
				
			
			@ -289,8 +286,8 @@ async def open_dgpu_node(
 | 
			
		|||
            f'{account}@{permission}'
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        print(collect_stdout(out))
 | 
			
		||||
        assert ec == 0
 | 
			
		||||
        if ec != 0:
 | 
			
		||||
            print(collect_stdout(out))
 | 
			
		||||
 | 
			
		||||
    async def get_input_data(ipfs_hash: str) -> bytes:
 | 
			
		||||
        if ipfs_hash == '':
 | 
			
		||||
| 
						 | 
				
			
			@ -317,51 +314,51 @@ async def open_dgpu_node(
 | 
			
		|||
                    rid = req['id']
 | 
			
		||||
 | 
			
		||||
                    my_results = [res['id'] for res in (await find_my_results())]
 | 
			
		||||
                    if rid in my_results:
 | 
			
		||||
                        continue
 | 
			
		||||
                    if rid not in my_results:
 | 
			
		||||
                        statuses = await get_status_by_request_id(rid)
 | 
			
		||||
 | 
			
		||||
                    statuses = await get_status_by_request_id(rid)
 | 
			
		||||
                        if len(statuses) < config['verification_amount']:
 | 
			
		||||
 | 
			
		||||
                    if len(statuses) < config['verification_amount']:
 | 
			
		||||
                            # parse request
 | 
			
		||||
                            body = json.loads(req['body'])
 | 
			
		||||
 | 
			
		||||
                        # parse request
 | 
			
		||||
                        body = json.loads(req['body'])
 | 
			
		||||
                            binary = await get_input_data(req['binary_data'])
 | 
			
		||||
 | 
			
		||||
                        binary = await get_input_data(req['binary_data'])
 | 
			
		||||
                            hash_str = (
 | 
			
		||||
                                str(await get_user_nonce(req['user']))
 | 
			
		||||
                                +
 | 
			
		||||
                                req['body']
 | 
			
		||||
                                +
 | 
			
		||||
                                req['binary_data']
 | 
			
		||||
                            )
 | 
			
		||||
                            logging.info(f'hashing: {hash_str}')
 | 
			
		||||
                            request_hash = sha256(hash_str.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
                        hash_str = (
 | 
			
		||||
                            str(await get_user_nonce(req['user']))
 | 
			
		||||
                            +
 | 
			
		||||
                            req['body']
 | 
			
		||||
                            +
 | 
			
		||||
                            req['binary_data']
 | 
			
		||||
                        )
 | 
			
		||||
                        logging.info(f'hashing: {hash_str}')
 | 
			
		||||
                        request_hash = sha256(hash_str.encode('utf-8')).hexdigest()
 | 
			
		||||
                            # TODO: validate request
 | 
			
		||||
 | 
			
		||||
                        # TODO: validate request
 | 
			
		||||
                            # perform work
 | 
			
		||||
                            logging.info(f'working on {body}')
 | 
			
		||||
 | 
			
		||||
                        # perform work
 | 
			
		||||
                        logging.info(f'working on {body}')
 | 
			
		||||
                            ec, _ = begin_work(rid)
 | 
			
		||||
                            if ec != 0:
 | 
			
		||||
                                logging.info(f'probably beign worked on already... skip.')
 | 
			
		||||
 | 
			
		||||
                        begin_work(rid)
 | 
			
		||||
                            else:
 | 
			
		||||
                                try:
 | 
			
		||||
                                    img_sha, raw_img = gpu_compute_one(
 | 
			
		||||
                                        body['method'], body['params'], binext=binary)
 | 
			
		||||
 | 
			
		||||
                        try:
 | 
			
		||||
                            img_sha, raw_img = gpu_compute_one(
 | 
			
		||||
                                body['method'], body['params'], binext=binary)
 | 
			
		||||
                                    ipfs_hash = publish_on_ipfs(img_sha, raw_img)
 | 
			
		||||
 | 
			
		||||
                            ipfs_hash = publish_on_ipfs(img_sha, raw_img)
 | 
			
		||||
                                    submit_work(rid, request_hash, img_sha, ipfs_hash)
 | 
			
		||||
                                    break
 | 
			
		||||
 | 
			
		||||
                            submit_work(rid, request_hash, img_sha, ipfs_hash)
 | 
			
		||||
 | 
			
		||||
                            break
 | 
			
		||||
 | 
			
		||||
                        except BaseException as e:
 | 
			
		||||
                            cancel_work(rid, str(e))
 | 
			
		||||
                                except BaseException as e:
 | 
			
		||||
                                    cancel_work(rid, str(e))
 | 
			
		||||
                                    break
 | 
			
		||||
 | 
			
		||||
                    else:
 | 
			
		||||
                        logging.info(f'request {rid} already beign worked on, skip...')
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                await trio.sleep(1)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue