Source code for wehrdj.ingest.utils

"""
Utility functions for ingestion! what else!
"""

import warnings

import pandas as pd


[docs]def col_to_datetime(column:pd.Series) -> pd.Series: """ Fix date column with improperly padded m/d/y formatting """ # remove any nonnumeric, nonslash characters column = column.str.replace(r'[^\d/]', '', regex=True) split_date = column.str.split('/', expand=True).rename(columns={0: 'month', 1: "day", 2: "year"}) # zfill months and days split_date['month'] = split_date['month'].str.zfill(2) split_date['day'] = split_date['day'].str.zfill(2) # strip preceding "20" on some dates split_date['year'] = split_date['year'].str[-2:] # recombine date_join = split_date['year'] + '/' + split_date['month'] + '/' + split_date['day'] date = pd.to_datetime(date_join, format="%y/%m/%d", errors='coerce') return date
[docs]def filter_nans(df:pd.DataFrame) -> pd.DataFrame: """filter any rows with NANs with warning""" na_rows = df.isna().any(axis=1) if sum(na_rows) > 0: warnings.warn(f"Dropping {sum(na_rows)} rows with NaNs in them:\n{df[na_rows]}") df = df[~na_rows] return df