mirror of https://github.com/skygpu/skynet.git
Rework gpu worker logic to work better in parallel with other workers
parent
2b18fa376b
commit
25c86b5eaf
|
@ -218,13 +218,12 @@ async def open_dgpu_node(
|
||||||
|
|
||||||
def begin_work(request_id: int):
|
def begin_work(request_id: int):
|
||||||
logging.info('begin_work')
|
logging.info('begin_work')
|
||||||
ec, out = cleos.push_action(
|
return cleos.push_action(
|
||||||
'telos.gpu',
|
'telos.gpu',
|
||||||
'workbegin',
|
'workbegin',
|
||||||
[account, request_id],
|
[account, request_id],
|
||||||
f'{account}@{permission}'
|
f'{account}@{permission}'
|
||||||
)
|
)
|
||||||
assert ec == 0
|
|
||||||
|
|
||||||
def cancel_work(request_id: int, reason: str):
|
def cancel_work(request_id: int, reason: str):
|
||||||
logging.info('cancel_work')
|
logging.info('cancel_work')
|
||||||
|
@ -234,7 +233,6 @@ async def open_dgpu_node(
|
||||||
[account, request_id, reason],
|
[account, request_id, reason],
|
||||||
f'{account}@{permission}'
|
f'{account}@{permission}'
|
||||||
)
|
)
|
||||||
assert ec == 0
|
|
||||||
|
|
||||||
def maybe_withdraw_all():
|
def maybe_withdraw_all():
|
||||||
logging.info('maybe_withdraw_all')
|
logging.info('maybe_withdraw_all')
|
||||||
|
@ -251,7 +249,6 @@ async def open_dgpu_node(
|
||||||
f'{account}@{permission}'
|
f'{account}@{permission}'
|
||||||
)
|
)
|
||||||
logging.info(collect_stdout(out))
|
logging.info(collect_stdout(out))
|
||||||
assert ec == 0
|
|
||||||
|
|
||||||
async def find_my_results():
|
async def find_my_results():
|
||||||
logging.info('find_my_results')
|
logging.info('find_my_results')
|
||||||
|
@ -289,8 +286,8 @@ async def open_dgpu_node(
|
||||||
f'{account}@{permission}'
|
f'{account}@{permission}'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ec != 0:
|
||||||
print(collect_stdout(out))
|
print(collect_stdout(out))
|
||||||
assert ec == 0
|
|
||||||
|
|
||||||
async def get_input_data(ipfs_hash: str) -> bytes:
|
async def get_input_data(ipfs_hash: str) -> bytes:
|
||||||
if ipfs_hash == '':
|
if ipfs_hash == '':
|
||||||
|
@ -317,9 +314,7 @@ async def open_dgpu_node(
|
||||||
rid = req['id']
|
rid = req['id']
|
||||||
|
|
||||||
my_results = [res['id'] for res in (await find_my_results())]
|
my_results = [res['id'] for res in (await find_my_results())]
|
||||||
if rid in my_results:
|
if rid not in my_results:
|
||||||
continue
|
|
||||||
|
|
||||||
statuses = await get_status_by_request_id(rid)
|
statuses = await get_status_by_request_id(rid)
|
||||||
|
|
||||||
if len(statuses) < config['verification_amount']:
|
if len(statuses) < config['verification_amount']:
|
||||||
|
@ -344,8 +339,11 @@ async def open_dgpu_node(
|
||||||
# perform work
|
# perform work
|
||||||
logging.info(f'working on {body}')
|
logging.info(f'working on {body}')
|
||||||
|
|
||||||
begin_work(rid)
|
ec, _ = begin_work(rid)
|
||||||
|
if ec != 0:
|
||||||
|
logging.info(f'probably beign worked on already... skip.')
|
||||||
|
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
img_sha, raw_img = gpu_compute_one(
|
img_sha, raw_img = gpu_compute_one(
|
||||||
body['method'], body['params'], binext=binary)
|
body['method'], body['params'], binext=binary)
|
||||||
|
@ -353,15 +351,14 @@ async def open_dgpu_node(
|
||||||
ipfs_hash = publish_on_ipfs(img_sha, raw_img)
|
ipfs_hash = publish_on_ipfs(img_sha, raw_img)
|
||||||
|
|
||||||
submit_work(rid, request_hash, img_sha, ipfs_hash)
|
submit_work(rid, request_hash, img_sha, ipfs_hash)
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
cancel_work(rid, str(e))
|
cancel_work(rid, str(e))
|
||||||
|
break
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logging.info(f'request {rid} already beign worked on, skip...')
|
logging.info(f'request {rid} already beign worked on, skip...')
|
||||||
continue
|
|
||||||
|
|
||||||
await trio.sleep(1)
|
await trio.sleep(1)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue