Skip to main content

SQL cells

SQL cell allows to run CHYT/SPYT/QL/YQL queries directly in Notebook.

To run sql cells, you must have:

  • YT_TOKEN env in kernel secrets
  • yt-wrapper with version >= 0.13.21. You can use default kenrel image, which already has. ghcr.io/tractoai/notebook-kernel-default:2024-12-27-14-31-09-307b3bc68

Create YQL cell with output variable yql_result

def __run_yql_query(query):
import yt.wrapper as yt
import json
output_data = [list(item.read_rows()) for item in yt.run_query(engine="yql", query=query).get_results()]
print("DATA:", json.dumps(output_data))
return output_data

yql_result = __run_yql_query(query="SELECT `lat`, `lon`, `squirrel_id`, `hectare`, `shift`, `date`, `hectare_squirrel_number`, `age`, `primary_fur_color`, `highlight_fur_color`, `color_notes`, `location`, `above_ground_sighter_measurement`, `specific_location`, `running`, `chasing`, `climbing`, `eating`, `foraging`, `other_activities`, `kuks`, `quaas`, `moans`, `tail_flags`, `tail_twitches`, `approaches`, `indifferent`, `runs_from`, `other_interactions` FROM `//home/samples/squirrels` LIMIT 5;")

Create CHYT cell with output variable chyt_result

def __run_chyt_query(engine, query, clique):
import yt.wrapper as yt
import json
output_data = [list(item.read_rows()) for item in yt.run_query(engine=engine, query=query, settings={"clique": clique}).get_results()]
print("DATA:", json.dumps(output_data))
return output_data

chyt_result = __run_chyt_query(engine="chyt", query="SELECT `lat`, `lon`, `squirrel_id`, `hectare`, `shift`, `date`, `hectare_squirrel_number`, `age`, `primary_fur_color`, `highlight_fur_color`, `color_notes`, `location`, `above_ground_sighter_measurement`, `specific_location`, `running`, `chasing`, `climbing`, `eating`, `foraging`, `other_activities`, `kuks`, `quaas`, `moans`, `tail_flags`, `tail_twitches`, `approaches`, `indifferent`, `runs_from`, `other_interactions` FROM `//home/samples/squirrels` LIMIT 5 OFFSET 5;", clique="ch_public")

Let's install pandas to preview data as a table

!pip uninstall pandas -y
!pip install pandas --no-cache-dir
import pandas as pd

Preview result from yql_result

pd.json_normalize(yql_result[0])

Preview result from chyt_result

pd.json_normalize(chyt_result[0])

Preview concated table

df1 = pd.DataFrame(yql_result[0])
df2 = pd.DataFrame(chyt_result[0])

pd.concat([df1, df2])