SQL cells
SQL cell allows to run CHYT/SPYT/QL/YQL queries directly in Notebook.
To run sql cells, you must have:
YT_TOKENenv in kernel secrets- yt-wrapper with version >= 0.13.21.
You can use default kenrel image, which already has.
ghcr.io/tractoai/notebook-kernel-default:2024-12-27-14-31-09-307b3bc68
Create YQL cell with output variable yql_result
def __run_yql_query(query):
import yt.wrapper as yt
import json
output_data = [list(item.read_rows()) for item in yt.run_query(engine="yql", query=query).get_results()]
print("DATA:", json.dumps(output_data))
return output_data
yql_result = __run_yql_query(query="SELECT `lat`, `lon`, `squirrel_id`, `hectare`, `shift`, `date`, `hectare_squirrel_number`, `age`, `primary_fur_color`, `highlight_fur_color`, `color_notes`, `location`, `above_ground_sighter_measurement`, `specific_location`, `running`, `chasing`, `climbing`, `eating`, `foraging`, `other_activities`, `kuks`, `quaas`, `moans`, `tail_flags`, `tail_twitches`, `approaches`, `indifferent`, `runs_from`, `other_interactions` FROM `//home/samples/squirrels` LIMIT 5;")
Create CHYT cell with output variable chyt_result
def __run_chyt_query(engine, query, clique):
import yt.wrapper as yt
import json
output_data = [list(item.read_rows()) for item in yt.run_query(engine=engine, query=query, settings={"clique": clique}).get_results()]
print("DATA:", json.dumps(output_data))
return output_data
chyt_result = __run_chyt_query(engine="chyt", query="SELECT `lat`, `lon`, `squirrel_id`, `hectare`, `shift`, `date`, `hectare_squirrel_number`, `age`, `primary_fur_color`, `highlight_fur_color`, `color_notes`, `location`, `above_ground_sighter_measurement`, `specific_location`, `running`, `chasing`, `climbing`, `eating`, `foraging`, `other_activities`, `kuks`, `quaas`, `moans`, `tail_flags`, `tail_twitches`, `approaches`, `indifferent`, `runs_from`, `other_interactions` FROM `//home/samples/squirrels` LIMIT 5 OFFSET 5;", clique="ch_public")
Let's install pandas to preview data as a table
!pip uninstall pandas -y
!pip install pandas --no-cache-dir
import pandas as pd
Preview result from yql_result
pd.json_normalize(yql_result[0])
Preview result from chyt_result
pd.json_normalize(chyt_result[0])
Preview concated table
df1 = pd.DataFrame(yql_result[0])
df2 = pd.DataFrame(chyt_result[0])
pd.concat([df1, df2])