11"""Integration tests for acquisition of rvdss data."""
22# standard library
33import unittest
4- from unittest .mock import MagicMock
4+ from unittest .mock import MagicMock , patch
5+ from copy import copy
56
67# first party
78from delphi .epidata .client .delphi_epidata import Epidata
8- from delphi .epidata .acquisition .rvdss .database import update
9+ from delphi .epidata .acquisition .rvdss .database import update , rvdss_cols , get_num_rows
910import delphi .operations .secrets as secrets
11+ from delphi_utils import get_structured_logger
1012
1113# third party
12- import mysql .connector
14+ import mysql .connector
15+ from mysql .connector .errors import IntegrityError
16+ import pandas as pd
17+ import numpy as np
18+ from pathlib import Path
19+ import pdb
1320
1421# py3tester coverage target (equivalent to `import *`)
1522# __test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'
1623
1724NEWLINE = "\n "
1825
1926class AcquisitionTests (unittest .TestCase ):
27+ logger = get_structured_logger ()
2028
2129 def setUp (self ):
2230 """Perform per-test setup."""
@@ -25,74 +33,189 @@ def setUp(self):
2533 # self.test_utils = UnitTestUtils(__file__)
2634
2735 # use the local instance of the Epidata API
28- Epidata .BASE_URL = 'https ://delphi_web_epidata/epidata'
36+ Epidata .BASE_URL = 'http ://delphi_web_epidata/epidata'
2937 Epidata .auth = ('epidata' , 'key' )
3038
3139 # use the local instance of the epidata database
3240 secrets .db .host = 'delphi_database_epidata'
3341 secrets .db .epi = ('user' , 'pass' )
3442
3543 # clear relevant tables
36- u , p = secrets .db .epi
37- cnx = mysql .connector .connect (user = u , password = p , database = "epidata" )
38- cur = cnx .cursor ()
44+ epidata_cnx = mysql .connector .connect (
45+ user = 'user' ,
46+ password = 'pass' ,
47+ host = 'delphi_database_epidata' ,
48+ database = 'epidata' )
49+ epidata_cur = epidata_cnx .cursor ()
50+
51+ epidata_cur .execute ('truncate table rvdss' )
52+ epidata_cur .execute ('DELETE from api_user' )
53+ epidata_cur .execute ('INSERT INTO api_user(api_key, email) VALUES ("key", "email")' )
54+ epidata_cnx .commit ()
55+ epidata_cur .close ()
56+ #epidata_cnx.close()
57+
58+ # make connection and cursor available to test cases
59+ self .cnx = epidata_cnx
60+ self .cur = epidata_cnx .cursor ()
61+
62+ def tearDown (self ):
63+ """Perform per-test teardown."""
64+ self .cur .close ()
65+ self .cnx .close ()
66+
67+ @patch ("mysql.connector.connect" )
68+ def test_rvdss_repiratory_detections (self , mock_sql ):
69+ connection_mock = MagicMock ()
70+
71+ TEST_DIR = Path (__file__ ).parent .parent .parent .parent
72+ detection_data = pd .read_csv (str (TEST_DIR ) + "/testdata/acquisition/rvdss/RVD_CurrentWeekTable_Formatted.csv" )
73+ detection_data ['time_type' ] = "week"
74+
75+ # get the index of the subset of data we want to use
76+ subset_index = detection_data [(detection_data ['geo_value' ].isin (['nl' , 'nb' ])) &
77+ (detection_data ['time_value' ].isin ([20240831 , 20240907 ]))].index
78+
79+
80+ # change issue so the data has more than one
81+ detection_data .loc [subset_index ,"issue" ] = 20250227
82+
83+ # take a small subset just for testing insertion
84+ detection_subset = detection_data .loc [subset_index ]
85+
86+ # get the expected response when calling the API
87+ # the dataframe needs to add the missing columns and replace nan with None
88+ # since that is what is returned from the API
89+ df = detection_subset .reindex (rvdss_cols ,axis = 1 )
90+ df = df .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
91+ df = df .to_dict (orient = "records" )
92+
93+ expected_response = {"epidata" : df ,
94+ "result" : 1 ,
95+ "message" : "success" ,
96+ }
97+
98+ # get another subset of the data not in the subset to test more calling options
99+ detection_subset2 = detection_data [(detection_data ['geo_value' ].isin (['nu' , 'nt' ])) & (detection_data ['time_value' ].isin ([20240831 , 20240907 ])) ]
100+
101+ df2 = detection_subset2 .reindex (rvdss_cols ,axis = 1 )
102+ df2 = df2 .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
103+ df2 = df2 .to_dict (orient = "records" )
104+
105+ expected_response2 = {"epidata" : df2 ,
106+ "result" : 1 ,
107+ "message" : "success" ,
108+ }
109+
110+ # get another subset of the data for a single geo_value with multiple issues
111+ subset_index2 = detection_data [(detection_data ['geo_value' ].isin (['ouest du québec' ])) &
112+ (detection_data ['time_value' ].isin ([20240831 , 20240907 ]))].index
113+
114+ detection_data .loc [subset_index2 ,"issue" ] = [20250220 ,20250227 ]
115+ detection_data .loc [subset_index2 ,"epiweek" ] = [202435 ,202435 ]
116+ detection_data .loc [subset_index2 ,"time_value" ] = [20240831 ,20240831 ]
117+
118+ detection_subset3 = detection_data .loc [subset_index2 ]
119+ df3 = detection_subset3 .reindex (rvdss_cols ,axis = 1 )
120+ df3 = df3 .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
121+ df3 = df3 .to_dict (orient = "records" )
122+
123+ expected_response3 = {"epidata" : df3 ,
124+ "result" : 1 ,
125+ "message" : "success" ,
126+ }
39127
40- cur .execute ('truncate table rvdss_repiratory_detections' )
41- cur .execute ('delete from api_user' )
42- cur .execute ('insert into api_user(api_key, email) values ("key", "emai")' )
43-
44- def test_rvdss_repiratory_detections (self ):
45128 # make sure the data does not yet exist
46129 with self .subTest (name = 'no data yet' ):
47- response = Epidata .rvdss_repiratory_detections (
48- '450822' , Epidata .range (20200101 , 20210101 ))
130+ response = Epidata .rvdss (geo_type = 'province' ,
131+ time_values = [202435 , 202436 ],
132+ geo_value = ['nl' ,'nb' ])
49133 self .assertEqual (response ['result' ], - 2 , response )
50134
51135 # acquire sample data into local database
52- # TODO: Define example data
53136 with self .subTest (name = 'first acquisition' ):
54- acquired = Update .run (network = mock_network )
55- #self.assertTrue(acquired)
56-
57- # make sure the data now exists
58- with self .subTest (name = 'initial data checks' ):
59- expected_spotchecks = {
60- "hospital_pk" : "450822" ,
61- "collection_week" : 20201030 ,
62- "publication_date" : 20210315 ,
63- "previous_day_total_ed_visits_7_day_sum" : 536 ,
64- "total_personnel_covid_vaccinated_doses_all_7_day_sum" : 18 ,
65- "total_beds_7_day_avg" : 69.3 ,
66- "previous_day_admission_influenza_confirmed_7_day_sum" : - 999999
67- }
68- response = Epidata .covid_hosp_facility (
69- '450822' , Epidata .range (20200101 , 20210101 ))
70- self .assertEqual (response ['result' ], 1 )
71- self .assertEqual (len (response ['epidata' ]), 2 )
72- row = response ['epidata' ][0 ]
73- for k ,v in expected_spotchecks .items ():
74- self .assertTrue (
75- k in row ,
76- f"no '{ k } ' in row:\n { NEWLINE .join (sorted (row .keys ()))} "
77- )
78- if isinstance (v , float ):
79- self .assertAlmostEqual (row [k ], v , f"row[{ k } ] is { row [k ]} not { v } " )
80- else :
81- self .assertEqual (row [k ], v , f"row[{ k } ] is { row [k ]} not { v } " )
82-
83- # expect 113 fields per row (114 database columns, except `id`)
84- self .assertEqual (len (row ), 113 )
85-
86- # re-acquisition of the same dataset should be a no-op
87- with self .subTest (name = 'second acquisition' ):
88- acquired = Update .run (network = mock_network )
89- self .assertFalse (acquired )
90-
91- # make sure the data still exists
92- with self .subTest (name = 'final data checks' ):
93- response = Epidata .covid_hosp_facility (
94- '450822' , Epidata .range (20200101 , 20210101 ))
95- self .assertEqual (response ['result' ], 1 )
96- self .assertEqual (len (response ['epidata' ]), 2 )
97-
98-
137+ # When the MagicMock connection's `cursor()` method is called, return
138+ # a real cursor made from the current open connection `cnx`.
139+ connection_mock .cursor .return_value = self .cnx .cursor ()
140+ # Commit via the current open connection `cnx`, from which the cursor
141+ # is derived
142+ connection_mock .commit = self .cnx .commit
143+ mock_sql .return_value = connection_mock
144+
145+ update (detection_subset , self .logger )
146+
147+ response = Epidata .rvdss (geo_type = 'province' ,
148+ time_values = [202435 , 202436 ],
149+ geo_value = ['nl' ,'nb' ])
150+
151+ self .assertEqual (response ,expected_response )
152+
153+ with self .subTest (name = 'duplicate aquisition' ):
154+ # The main run function checks if the update has already been fetched/updated
155+ # so it should never run twice, and duplocate aquisitions should never
156+ # occur. Running the update twice will result in an error
157+
158+ # When the MagicMock connection's `cursor()` method is called, return
159+ # a real cursor made from the current open connection `cnx`.
160+ connection_mock .cursor .return_value = self .cnx .cursor ()
161+ # Commit via the current open connection `cnx`, from which the cursor
162+ # is derived
163+ connection_mock .commit = self .cnx .commit
164+ mock_sql .return_value = connection_mock
165+
166+ with self .assertRaises (mysql .connector .errors .IntegrityError ):
167+ update (detection_subset , self .logger )
168+
169+ # Request with exact column order
170+ with self .subTest (name = 'exact column order' ):
171+ rvdss_cols_subset = [col for col in detection_subset2 .columns if col in rvdss_cols ]
172+ ordered_cols = [col for col in rvdss_cols if col in rvdss_cols_subset ]
173+ ordered_df = detection_subset2 [ordered_cols ]
174+
175+ connection_mock .cursor .return_value = self .cnx .cursor ()
176+ connection_mock .commit = self .cnx .commit
177+ mock_sql .return_value = connection_mock
178+
179+ update (ordered_df , self .logger )
180+
181+ response = Epidata .rvdss (geo_type = 'province' ,
182+ time_values = [202435 , 202436 ],
183+ geo_value = ['nt' ,'nu' ])
184+
185+ self .assertEqual (response ,expected_response2 )
186+
187+
188+ # request by issue
189+ with self .subTest (name = 'issue request' ):
190+ response = Epidata .rvdss (geo_type = 'province' ,
191+ time_values = [202435 , 202436 ],
192+ geo_value = ['nl' ,'nb' ],
193+ issues = 20250227 )
194+
195+ self .assertEqual (response ,expected_response )
196+
197+
198+ # check requesting lists vs single values
199+ with self .subTest (name = 'duplicate aquisition' ):
200+ # * with geo_value, single geo_type, time_value, issue
201+ connection_mock .cursor .return_value = self .cnx .cursor ()
202+ connection_mock .commit = self .cnx .commit
203+ mock_sql .return_value = connection_mock
204+
205+ update (detection_subset3 , self .logger )
206+
207+ response = Epidata .rvdss (geo_type = 'province' ,
208+ time_values = [202435 , 202436 ],
209+ geo_value = "*" ,
210+ issues = 20250227 )
211+
212+ response2 = Epidata .rvdss (geo_type = 'lab' ,
213+ time_values = 202435 ,
214+ geo_value = 'ouest du québec' ,
215+ issues = [20250220 ,20250227 ])
216+
217+ self .assertEqual (response ,expected_response )
218+ self .assertEqual (response2 ,expected_response3 )
219+
220+
221+
0 commit comments