{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Convert NOAA weahter data file \".dly\" to Pandas DataFrame\n", "\n", "Follow this instruction https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt\n", "\n", "Get data from ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily\n", "\n", "" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2018-02-03T06:16:48.354909Z", "start_time": "2018-02-03T06:16:45.219683Z" }, "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import numpy as np\n", "import re\n", "import ftplib\n", "\n", "%matplotlib notebook" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2018-02-03T06:16:48.369288Z", "start_time": "2018-02-03T06:16:48.356797Z" }, "collapsed": true }, "outputs": [], "source": [ "# download data from FTP\n", "\n", "def download_file_from_ftp(FTP_SERVER,FTP_PATH,FILENAME):\n", " with ftplib.FTP(FTP_SERVER) as ftp:\n", " ftp.login()\n", " ftp.cwd(FTP_PATH)\n", " with open(FILENAME, 'wb') as f:\n", " ftp.retrbinary('RETR ' + FILENAME, f.write)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Query station ID" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2018-02-03T06:16:48.407788Z", "start_time": "2018-02-03T06:16:48.371677Z" }, "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "def get_station_ID(station_to_find, filename):\n", " for line in open(filename):\n", " if station_to_find in line:\n", " line_with_station=line\n", " station_ID=re.split(\" \",line_with_station)[0]\n", " return station_ID\n", " return None\n", "# warning, it is slow, download it only once\n", "download_file_from_ftp(\"ftp.ncdc.noaa.gov\", \"/pub/data/ghcn/daily\", \"ghcnd-stations.txt\")\n", "\n", "station_to_find=\"GUANGZHOU\" # USE CAPS\n", "station_ID=get_station_ID(station_to_find, \"ghcnd-stations.txt\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Download weather data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2018-02-03T06:16:54.711515Z", "start_time": "2018-02-03T06:16:48.410687Z" }, "collapsed": true }, "outputs": [], "source": [ "weather_data_filename=station_ID+'.dly'\n", "\n", "# warning, it is slow, download it only once\n", "download_file_from_ftp(\"ftp.ncdc.noaa.gov\", \"/pub/data/ghcn/daily/all\", weather_data_filename)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Convert .dly to pandas Dataframe" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2018-02-03T06:18:19.782253Z", "start_time": "2018-02-03T06:18:19.412362Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | YEAR | \n", "MONTH | \n", "ELEMENT | \n", "VALUE1 | \n", "VALUE2 | \n", "VALUE3 | \n", "VALUE4 | \n", "VALUE5 | \n", "VALUE6 | \n", "VALUE7 | \n", "... | \n", "VALUE22 | \n", "VALUE23 | \n", "VALUE24 | \n", "VALUE25 | \n", "VALUE26 | \n", "VALUE27 | \n", "VALUE28 | \n", "VALUE29 | \n", "VALUE30 | \n", "VALUE31 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1945 | \n", "11 | \n", "TAVG | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "107.0 | \n", "NaN | \n", "
1 | \n", "1945 | \n", "12 | \n", "TAVG | \n", "123.0 | \n", "136.0 | \n", "152.0 | \n", "144.0 | \n", "146.0 | \n", "189.0 | \n", "219.0 | \n", "... | \n", "179.0 | \n", "146.0 | \n", "128.0 | \n", "107.0 | \n", "104.0 | \n", "112.0 | \n", "122.0 | \n", "127.0 | \n", "129.0 | \n", "156.0 | \n", "
2 | \n", "1946 | \n", "1 | \n", "TAVG | \n", "150.0 | \n", "150.0 | \n", "123.0 | \n", "117.0 | \n", "112.0 | \n", "121.0 | \n", "125.0 | \n", "... | \n", "146.0 | \n", "153.0 | \n", "173.0 | \n", "196.0 | \n", "211.0 | \n", "212.0 | \n", "218.0 | \n", "201.0 | \n", "156.0 | \n", "131.0 | \n", "
3 | \n", "1946 | \n", "2 | \n", "TAVG | \n", "114.0 | \n", "112.0 | \n", "147.0 | \n", "181.0 | \n", "195.0 | \n", "192.0 | \n", "149.0 | \n", "... | \n", "201.0 | \n", "196.0 | \n", "231.0 | \n", "226.0 | \n", "221.0 | \n", "229.0 | \n", "240.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "1946 | \n", "3 | \n", "TAVG | \n", "237.0 | \n", "162.0 | \n", "142.0 | \n", "133.0 | \n", "183.0 | \n", "187.0 | \n", "160.0 | \n", "... | \n", "183.0 | \n", "192.0 | \n", "205.0 | \n", "216.0 | \n", "223.0 | \n", "238.0 | \n", "207.0 | \n", "195.0 | \n", "233.0 | \n", "228.0 | \n", "
5 rows × 34 columns
\n", "