Upload, download and use files in YTsaurus operations
This notebook contains examples of how to store large binary data on YTsaurus as files.
This notebooks demonstrates how to:
- Upload file.
- Read file.
- Use file in operation.
Files can be useful in cases:
- Operations require using dictionaries (e.g., for working with geodata, currency rates, etc)
- For saving checkpoints or a trained model.
- You need to store data in a reliable storage.
- We store big Jupyter notebooks as files:)
from yt import wrapper as yt
import uuid
import json
Create a base directory for examples
username = yt.get_user_name()
if yt.exists(f"//sys/users/{username}/@user_info/home_path"):
home = yt.get(f"//sys/users/{username}/@user_info/home_path")
working_dir = f"{home}/{uuid.uuid4().hex}"
else:
working_dir = f"//tmp/examples/{uuid.uuid4().hex}"
yt.create("map_node", working_dir)
print(working_dir)
Upload and download file as serialized json
rates = {"USD":1,"AED":3.6725,"AFN":70.930145,"ALL":89.880745,"AMD":388.084007,"ANG":1.79,"AOA":910.532308,"ARS":948.17,"AUD":1.474829,"AWG":1.79,"AZN":1.699237,"BAM":1.749036,"BBD":2,"BDT":119.497266,"BGN":1.749165,"BHD":0.376,"BIF":2900.267644,"BMD":1,"BND":1.302522,"BOB":6.918444,"BRL":5.563555,"BSD":1,"BTN":83.858202,"BWP":13.368056,"BYN":3.25702,"BZD":2,"CAD":1.351507,"CDF":2821.533873,"CHF":0.847992,"CLP":914.946844,"CNY":7.125721,"COP":4032.124348,"CRC":523.251219,"CUP":24,"CVE":98.606453,"CZK":22.409394,"DJF":177.721,"DKK":6.669549,"DOP":59.725937,"DZD":134.240444,"EGP":48.773605,"ERN":15,"ETB":110.793381,"EUR":0.894271,"FJD":2.209201,"FKP":0.758066,"FOK":6.669253,"GBP":0.75807,"GEL":2.698374,"GGP":0.758066,"GHS":15.646015,"GIP":0.758066,"GMD":70.090529,"GNF":8687.65698,"GTQ":7.739001,"GYD":209.238751,"HKD":7.796834,"HNL":24.795486,"HRK":6.737862,"HTG":131.727086,"HUF":351.816224,"IDR":15463.537936,"ILS":3.689802,"IMP":0.758066,"INR":83.858211,"IQD":1307.71534,"IRR":42046.557011,"ISK":136.842504,"JEP":0.758066,"JMD":156.609396,"JOD":0.709,"JPY":144.238461,"KES":128.860802,"KGS":85.514332,"KHR":4063.922959,"KID":1.474819,"KMF":439.950981,"KRW":1326.065473,"KWD":0.305137,"KYD":0.833333,"KZT":483.412984,"LAK":21958.776695,"LBP":89500,"LKR":299.54412,"LRD":195.128633,"LSL":17.727478,"LYD":4.761189,"MAD":9.651574,"MDL":17.440239,"MGA":4559.019151,"MKD":55.302516,"MMK":2099.920135,"MNT":3394.316353,"MOP":8.031133,"MRU":39.757746,"MUR":46.041061,"MVR":15.425412,"MWK":1734.083284,"MXN":19.262175,"MYR":4.37505,"MZN":63.668821,"NAD":17.727478,"NGN":1572.880665,"NIO":36.923597,"NOK":10.483587,"NPR":134.173123,"NZD":1.607849,"OMR":0.384497,"PAB":1,"PEN":3.743768,"PGK":3.892274,"PHP":56.230734,"PKR":278.734522,"PLN":3.824356,"PYG":7598.969319,"QAR":3.64,"RON":4.471734,"RSD":105.116406,"RUB":91.507333,"RWF":1331.048915,"SAR":3.75,"SBD":8.500735,"SCR":13.837586,"SDG":458.303626,"SEK":10.200087,"SGD":1.302527,"SHP":0.758066,"SLE":22.414795,"SLL":22414.795049,"SOS":571.574926,"SRD":29.173375,"SSP":2809.067693,"STN":21.909564,"SYP":13122.06765,"SZL":17.727478,"THB":34.138685,"TJS":10.605013,"TMT":3.499058,"TND":3.045497,"TOP":2.324466,"TRY":34.030627,"TTD":6.784157,"TVD":1.474819,"TWD":31.688138,"TZS":2699.373687,"UAH":41.241903,"UGX":3715.735216,"UYU":40.352563,"UZS":12685.942826,"VES":36.5888,"VND":24991.37597,"VUV":117.995378,"WST":2.694395,"XAF":586.601308,"XCD":2.7,"XDR":0.742859,"XOF":586.601308,"XPF":106.714781,"YER":250.215458,"ZAR":17.727644,"ZMW":26.177908,"ZWL":13.8134}
json_rates = json.dumps(rates).encode("utf-8")
write_file can wright arbitrary binary data
file_path = f"{working_dir}/file"
yt.write_file(file_path, json_rates)
content = yt.read_file(file_path).read()
json.loads(content).keys()
Use files in operations
def mapper(row):
# file's name is equial to table's name
assert "USD" in open("file").read()
src = f"{working_dir}/fake_src"
dst = f"{working_dir}/fake_dst"
yt.write_table(src, [{"x": 1}])
yt_files parameter makes operation download file from cypress and save it on job's file system. Local file has the same name as cypress node.
yt.run_map(
mapper,
src,
dst,
yt_files=[file_path],
)