SQL to Python Pandas
Filtering
Filter on one column
select
    *
from
    df
where
    field = value
df_result = df[df['field'] == value]
Filter on two columns with an and
select
    *
from
    df
where
    field1 = value1 and 
    field2 = value2
df_result = df[(df['field1'] == value2) & (df['field2'] == value2)]
Filter on a list
SELECT
    *
FROM
    df
WHERE
    field1 IN ('value1', 'value2')
AND
    field2 IN ('value3', 'value4');
df_result = df[(df['field1'].isin(['value1', 'value2'])) & (df['field2'].isin(['value3', 'value4']))]
Filter on the latest date
select
    *
from
    df
where
    datestamp = (select max(datestamp) from df)
df_result = df[df['datestamp'] == df['datestamp'].max()]
Filter on the latest date grouped by dimension
SELECT
    MAX(datestamp),
    dimension
FROM
    df
GROUP BY
    metric_id;
df_latest = df.merge(
    df.groupby('dimension', as_index=False).agg({'datestamp': 'max'}),
    on=['dimension', 'datestamp'],
    how='inner'
)
Joining
SELECT
    *
FROM
    df1
LEFT JOIN df2 ON
    df2.id2 = df1.id1
df_result = pd.merge(
    df1,
    df2,
    left_on="id1",
    right_on="id2",
    how="left"
)
Retrieve a single distinct value
SELECT DISTINCT
    field
FROM
    df
df_result = df['field'].unique()
Retrieve multiple distinct values
SELECT DISTINCT
    field1,
    field2
FROM
    df
distinct_df = df[['field1', 'field2']].drop_duplicates()
Summarising
select
    id,
    datestamp,
    avg(score),
    avg(slo)
from
    df
group by
    id,
    datestamp
df_result = df.groupby(['id','datestamp'],as_index=True).agg({
    'score' : 'mean',
    'slo'   : 'mean'
}).reset_index()
df_result = ['id','datestamp', 'score','slo']
Calculated fields
select
    total_ok,
    total,
    total_ok / total as score
from
    df
df['score'] = df.apply(lambda row: row['total_ok'] / row['total'], axis=1)