WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit a40b3ee

Browse files
authored
Merge pull request #1700 from cmu-delphi/rvdss-integration-tests
Merge RVDSS integration tests into main indicator branch
2 parents 24c6b9f + d531cec commit a40b3ee

File tree

16 files changed

+2018
-1906
lines changed

16 files changed

+2018
-1906
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
docker network create --driver bridge delphi-net
6666
docker run --rm -d -p 13306:3306 --network delphi-net --name delphi_database_epidata --cap-add=sys_nice delphi_database_epidata
6767
docker run --rm -d -p 6379:6379 --network delphi-net --env "REDIS_PASSWORD=1234" --name delphi_redis delphi_redis
68-
68+
6969
7070
- run: |
7171
wget https://raw.githubusercontent.com/eficode/wait-for/master/wait-for

integrations/acquisition/covid_hosp/facility/test_scenarios.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def setUp(self):
4242
cur.execute('truncate table covid_hosp_facility_key')
4343
cur.execute('truncate table covid_hosp_meta')
4444
cur.execute('delete from api_user')
45-
cur.execute('insert into api_user(api_key, email) values ("key", "emai")')
45+
cur.execute('insert into api_user(api_key, email) values ("key", "email")')
4646

4747
@freeze_time("2021-03-16")
4848
def test_acquire_dataset(self):
Lines changed: 183 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
"""Integration tests for acquisition of rvdss data."""
22
# standard library
33
import unittest
4-
from unittest.mock import MagicMock
4+
from unittest.mock import MagicMock, patch
5+
from copy import copy
56

67
# first party
78
from delphi.epidata.client.delphi_epidata import Epidata
8-
from delphi.epidata.acquisition.rvdss.database import update
9+
from delphi.epidata.acquisition.rvdss.database import update, rvdss_cols, get_num_rows
910
import delphi.operations.secrets as secrets
11+
from delphi_utils import get_structured_logger
1012

1113
# third party
12-
import mysql.connector
14+
import mysql.connector
15+
from mysql.connector.errors import IntegrityError
16+
import pandas as pd
17+
import numpy as np
18+
from pathlib import Path
19+
import pdb
1320

1421
# py3tester coverage target (equivalent to `import *`)
1522
# __test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'
1623

1724
NEWLINE="\n"
1825

1926
class AcquisitionTests(unittest.TestCase):
27+
logger = get_structured_logger()
2028

2129
def setUp(self):
2230
"""Perform per-test setup."""
@@ -25,74 +33,189 @@ def setUp(self):
2533
# self.test_utils = UnitTestUtils(__file__)
2634

2735
# use the local instance of the Epidata API
28-
Epidata.BASE_URL = 'https://delphi_web_epidata/epidata'
36+
Epidata.BASE_URL = 'http://delphi_web_epidata/epidata'
2937
Epidata.auth = ('epidata', 'key')
3038

3139
# use the local instance of the epidata database
3240
secrets.db.host = 'delphi_database_epidata'
3341
secrets.db.epi = ('user', 'pass')
3442

3543
# clear relevant tables
36-
u, p = secrets.db.epi
37-
cnx = mysql.connector.connect(user=u, password=p, database="epidata")
38-
cur = cnx.cursor()
44+
epidata_cnx = mysql.connector.connect(
45+
user='user',
46+
password='pass',
47+
host='delphi_database_epidata',
48+
database='epidata')
49+
epidata_cur = epidata_cnx.cursor()
50+
51+
epidata_cur.execute('truncate table rvdss')
52+
epidata_cur.execute('DELETE from api_user')
53+
epidata_cur.execute('INSERT INTO api_user(api_key, email) VALUES ("key", "email")')
54+
epidata_cnx.commit()
55+
epidata_cur.close()
56+
#epidata_cnx.close()
57+
58+
# make connection and cursor available to test cases
59+
self.cnx = epidata_cnx
60+
self.cur = epidata_cnx.cursor()
61+
62+
def tearDown(self):
63+
"""Perform per-test teardown."""
64+
self.cur.close()
65+
self.cnx.close()
66+
67+
@patch("mysql.connector.connect")
68+
def test_rvdss_repiratory_detections(self, mock_sql):
69+
connection_mock = MagicMock()
70+
71+
TEST_DIR = Path(__file__).parent.parent.parent.parent
72+
detection_data = pd.read_csv(str(TEST_DIR) + "/testdata/acquisition/rvdss/RVD_CurrentWeekTable_Formatted.csv")
73+
detection_data['time_type'] = "week"
74+
75+
# get the index of the subset of data we want to use
76+
subset_index = detection_data[(detection_data['geo_value'].isin(['nl', 'nb'])) &
77+
(detection_data['time_value'].isin([20240831, 20240907]))].index
78+
79+
80+
# change issue so the data has more than one
81+
detection_data.loc[subset_index,"issue"] = 20250227
82+
83+
# take a small subset just for testing insertion
84+
detection_subset = detection_data.loc[subset_index]
85+
86+
# get the expected response when calling the API
87+
# the dataframe needs to add the missing columns and replace nan with None
88+
# since that is what is returned from the API
89+
df = detection_subset.reindex(rvdss_cols,axis=1)
90+
df = df.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
91+
df = df.to_dict(orient = "records")
92+
93+
expected_response = {"epidata": df,
94+
"result": 1,
95+
"message": "success",
96+
}
97+
98+
# get another subset of the data not in the subset to test more calling options
99+
detection_subset2 = detection_data[(detection_data['geo_value'].isin(['nu', 'nt'])) & (detection_data['time_value'].isin([20240831, 20240907])) ]
100+
101+
df2 = detection_subset2.reindex(rvdss_cols,axis=1)
102+
df2 = df2.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
103+
df2 = df2.to_dict(orient = "records")
104+
105+
expected_response2 = {"epidata": df2,
106+
"result": 1,
107+
"message": "success",
108+
}
109+
110+
# get another subset of the data for a single geo_value with multiple issues
111+
subset_index2 = detection_data[(detection_data['geo_value'].isin(['ouest du québec'])) &
112+
(detection_data['time_value'].isin([20240831, 20240907]))].index
113+
114+
detection_data.loc[subset_index2,"issue"] = [20250220,20250227]
115+
detection_data.loc[subset_index2,"epiweek"] = [202435,202435]
116+
detection_data.loc[subset_index2,"time_value"] = [20240831,20240831]
117+
118+
detection_subset3 = detection_data.loc[subset_index2]
119+
df3 = detection_subset3.reindex(rvdss_cols,axis=1)
120+
df3 = df3.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
121+
df3 = df3.to_dict(orient = "records")
122+
123+
expected_response3 = {"epidata": df3,
124+
"result": 1,
125+
"message": "success",
126+
}
39127

40-
cur.execute('truncate table rvdss_repiratory_detections')
41-
cur.execute('delete from api_user')
42-
cur.execute('insert into api_user(api_key, email) values ("key", "emai")')
43-
44-
def test_rvdss_repiratory_detections(self):
45128
# make sure the data does not yet exist
46129
with self.subTest(name='no data yet'):
47-
response = Epidata.rvdss_repiratory_detections(
48-
'450822', Epidata.range(20200101, 20210101))
130+
response = Epidata.rvdss(geo_type='province',
131+
time_values= [202435, 202436],
132+
geo_value = ['nl','nb'])
49133
self.assertEqual(response['result'], -2, response)
50134

51135
# acquire sample data into local database
52-
# TODO: Define example data
53136
with self.subTest(name='first acquisition'):
54-
acquired = Update.run(network=mock_network)
55-
#self.assertTrue(acquired)
56-
57-
# make sure the data now exists
58-
with self.subTest(name='initial data checks'):
59-
expected_spotchecks = {
60-
"hospital_pk": "450822",
61-
"collection_week": 20201030,
62-
"publication_date": 20210315,
63-
"previous_day_total_ed_visits_7_day_sum": 536,
64-
"total_personnel_covid_vaccinated_doses_all_7_day_sum": 18,
65-
"total_beds_7_day_avg": 69.3,
66-
"previous_day_admission_influenza_confirmed_7_day_sum": -999999
67-
}
68-
response = Epidata.covid_hosp_facility(
69-
'450822', Epidata.range(20200101, 20210101))
70-
self.assertEqual(response['result'], 1)
71-
self.assertEqual(len(response['epidata']), 2)
72-
row = response['epidata'][0]
73-
for k,v in expected_spotchecks.items():
74-
self.assertTrue(
75-
k in row,
76-
f"no '{k}' in row:\n{NEWLINE.join(sorted(row.keys()))}"
77-
)
78-
if isinstance(v, float):
79-
self.assertAlmostEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
80-
else:
81-
self.assertEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
82-
83-
# expect 113 fields per row (114 database columns, except `id`)
84-
self.assertEqual(len(row), 113)
85-
86-
# re-acquisition of the same dataset should be a no-op
87-
with self.subTest(name='second acquisition'):
88-
acquired = Update.run(network=mock_network)
89-
self.assertFalse(acquired)
90-
91-
# make sure the data still exists
92-
with self.subTest(name='final data checks'):
93-
response = Epidata.covid_hosp_facility(
94-
'450822', Epidata.range(20200101, 20210101))
95-
self.assertEqual(response['result'], 1)
96-
self.assertEqual(len(response['epidata']), 2)
97-
98-
137+
# When the MagicMock connection's `cursor()` method is called, return
138+
# a real cursor made from the current open connection `cnx`.
139+
connection_mock.cursor.return_value = self.cnx.cursor()
140+
# Commit via the current open connection `cnx`, from which the cursor
141+
# is derived
142+
connection_mock.commit = self.cnx.commit
143+
mock_sql.return_value = connection_mock
144+
145+
update(detection_subset, self.logger)
146+
147+
response = Epidata.rvdss(geo_type='province',
148+
time_values= [202435, 202436],
149+
geo_value = ['nl','nb'])
150+
151+
self.assertEqual(response,expected_response)
152+
153+
with self.subTest(name='duplicate aquisition'):
154+
# The main run function checks if the update has already been fetched/updated
155+
# so it should never run twice, and duplocate aquisitions should never
156+
# occur. Running the update twice will result in an error
157+
158+
# When the MagicMock connection's `cursor()` method is called, return
159+
# a real cursor made from the current open connection `cnx`.
160+
connection_mock.cursor.return_value = self.cnx.cursor()
161+
# Commit via the current open connection `cnx`, from which the cursor
162+
# is derived
163+
connection_mock.commit = self.cnx.commit
164+
mock_sql.return_value = connection_mock
165+
166+
with self.assertRaises(mysql.connector.errors.IntegrityError):
167+
update(detection_subset, self.logger)
168+
169+
# Request with exact column order
170+
with self.subTest(name='exact column order'):
171+
rvdss_cols_subset = [col for col in detection_subset2.columns if col in rvdss_cols]
172+
ordered_cols = [col for col in rvdss_cols if col in rvdss_cols_subset]
173+
ordered_df = detection_subset2[ordered_cols]
174+
175+
connection_mock.cursor.return_value = self.cnx.cursor()
176+
connection_mock.commit = self.cnx.commit
177+
mock_sql.return_value = connection_mock
178+
179+
update(ordered_df, self.logger)
180+
181+
response = Epidata.rvdss(geo_type='province',
182+
time_values= [202435, 202436],
183+
geo_value = ['nt','nu'])
184+
185+
self.assertEqual(response,expected_response2)
186+
187+
188+
# request by issue
189+
with self.subTest(name='issue request'):
190+
response = Epidata.rvdss(geo_type='province',
191+
time_values= [202435, 202436],
192+
geo_value = ['nl','nb'],
193+
issues = 20250227)
194+
195+
self.assertEqual(response,expected_response)
196+
197+
198+
# check requesting lists vs single values
199+
with self.subTest(name='duplicate aquisition'):
200+
# * with geo_value, single geo_type, time_value, issue
201+
connection_mock.cursor.return_value = self.cnx.cursor()
202+
connection_mock.commit = self.cnx.commit
203+
mock_sql.return_value = connection_mock
204+
205+
update(detection_subset3, self.logger)
206+
207+
response = Epidata.rvdss(geo_type='province',
208+
time_values= [202435, 202436],
209+
geo_value = "*",
210+
issues = 20250227)
211+
212+
response2 = Epidata.rvdss(geo_type='lab',
213+
time_values= 202435,
214+
geo_value = 'ouest du québec',
215+
issues = [20250220,20250227])
216+
217+
self.assertEqual(response,expected_response)
218+
self.assertEqual(response2,expected_response3)
219+
220+
221+

0 commit comments

Comments
 (0)