updated test cases

Priyanka-Microsoft · Priyanka-Microsoft · commit c6bcbc034c1d · 2025-12-05T11:48:43.000+05:30
diff --git a/code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py b/code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py
@@ -63,6 +63,12 @@ def setup_blob_metadata_mocking(httpserver: HTTPServer, app_config: AppConfig):
         method="PUT",
     ).respond_with_data()
 
+    # Mock GET request for image download (base64 conversion)
+    httpserver.expect_request(
+        f"/{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}/{FILE_NAME}",
+        method="GET",
+    ).respond_with_data(b"fake_image_data", content_type="image/jpeg")
+
 
 @pytest.fixture(autouse=True)
 def setup_caption_response(httpserver: HTTPServer, app_config: AppConfig):
@@ -192,11 +198,10 @@ def test_image_passed_to_llm_to_generate_caption(
         ),
     )[0]
 
-    assert request.get_json()["messages"][1]["content"][1]["image_url"][
-        "url"
-    ].startswith(
-        f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}/{FILE_NAME}"
-    )
+    # The URL should be a direct URL (not base64) for http/https URLs to save tokens
+    image_url = request.get_json()["messages"][1]["content"][1]["image_url"]["url"]
+    expected_url_prefix = f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}/{FILE_NAME}"
+    assert image_url.startswith(expected_url_prefix), f"Expected direct URL starting with {expected_url_prefix}, got {image_url[:100]}"
 
 
 def test_embeddings_generated_for_caption(
diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py
@@ -171,6 +171,20 @@ def azure_computer_vision_mock():
         yield mock
 
 
+@pytest.fixture(autouse=True)
+def urllib_request_mock():
+    with patch(
+        "backend.batch.utilities.helpers.embedders.push_embedder.urllib.request.urlopen"
+    ) as mock:
+        # Create a mock response object
+        mock_response = MagicMock()
+        mock_response.read.return_value = b"fake_image_data"
+        mock_response.__enter__.return_value = mock_response
+        mock_response.__exit__.return_value = None
+        mock.return_value = mock_response
+        yield mock
+
+
 def test_embed_file_advanced_image_processing_vectorizes_image(
     azure_computer_vision_mock,
 ):
@@ -200,29 +214,23 @@ def test_embed_file_advanced_image_processing_uses_vision_model_for_captioning(
     push_embedder.embed_file(source_url, "some-file-name.jpg")
 
     # then
-    llm_helper_mock.get_chat_completion.assert_called_once_with(
-        [
-            {
-                "role": "system",
-                "content": """You are an assistant that generates rich descriptions of images.
-You need to be accurate in the information you extract and detailed in the descriptons you generate.
-Do not abbreviate anything and do not shorten sentances. Explain the image completely.
-If you are provided with an image of a flow chart, describe the flow chart in detail.
-If the image is mostly text, use OCR to extract the text as it is displayed in the image.""",
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "text": "Describe this image in detail. Limit the response to 500 words.",
-                        "type": "text",
-                    },
-                    {"image_url": {"url": source_url}, "type": "image_url"},
-                ],
-            },
-        ],
-        env_helper_mock.AZURE_OPENAI_VISION_MODEL,
-    )
+    # Verify the vision model is called with direct URL (not base64) for token efficiency
+    llm_helper_mock.get_chat_completion.assert_called_once()
+    call_args = llm_helper_mock.get_chat_completion.call_args
+    messages = call_args[0][0]
+    model = call_args[0][1]
+
+    assert model == env_helper_mock.AZURE_OPENAI_VISION_MODEL
+    assert len(messages) == 2
+    assert messages[0]["role"] == "system"
+    assert "You are an assistant that generates rich descriptions of images" in messages[0]["content"]
+    assert messages[1]["role"] == "user"
+    assert len(messages[1]["content"]) == 2
+    assert messages[1]["content"][0]["type"] == "text"
+    assert "Describe this image in detail" in messages[1]["content"][0]["text"]
+    assert messages[1]["content"][1]["type"] == "image_url"
+    # Direct URL should be used (not base64) for http/https URLs to save tokens
+    assert messages[1]["content"][1]["image_url"]["url"] == source_url
 
 
 def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index(