Skip to main content

offline-inference

import requests
import uuid
import yt.wrapper as yt
username = yt.get_user_name()
if yt.exists(f"//sys/users/{username}/@user_info/home_path"):
home = yt.get(f"//sys/users/{username}/@user_info/home_path")
working_dir = f"{home}/{uuid.uuid4().hex}"
else:
working_dir = f"//tmp/examples/{uuid.uuid4().hex}"
yt.create("map_node", working_dir)
print(working_dir)
names = requests.get('https://raw.githubusercontent.com/dominictarr/random-name/refs/heads/master/first-names.txt').content.decode("utf-8").split("\r\n")

yt.create("table", f"{working_dir}/names", ignore_existing=True)
yt.write_table(f"{working_dir}/names", [{"name": name} for name in names])
class StoriesGenerator:
def __init__(self):
self.model_loaded = False

def __call__(self, row):
import sys
if not self.model_loaded:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch

self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M").to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
self.model_loaded = True

name = row["name"]
prompt = f"{name} was a little child "
print("Prompt {}".format(prompt), file=sys.stderr)
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device)
output = self.model.generate(input_ids, max_length = 100, num_beams=1)
output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)

yield {"story": output_text}

yt.create("table", f"{working_dir}/tales", force=True)

yt.run_map(
StoriesGenerator(),
f"{working_dir}/names",
f"{working_dir}/tales",
spec={
"job_count": 20,
"pool_trees": ["gpu_h100"],
"mapper": {
"gpu_limit": 1,
"cpu_limit": 4.0,
},
}
)