diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index 04890a8657..b9f1d9b235 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -16,7 +16,6 @@ from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger from llama_stack_api import ( - ListModelsResponse, Model, ModelNotFoundError, Models, @@ -128,19 +127,6 @@ async def _get_dynamic_models_from_provider_data(self) -> list[Model]: return dynamic_models - async def list_models(self) -> ListModelsResponse: - # Get models from registry - registry_models = await self.get_all_with_type("model") - - # Get additional models available via provider_data (user-specific, not cached) - dynamic_models = await self._get_dynamic_models_from_provider_data() - - # Combine, avoiding duplicates (registry takes precedence) - registry_identifiers = {m.identifier for m in registry_models} - unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers] - - return ListModelsResponse(data=registry_models + unique_dynamic_models) - async def openai_list_models(self) -> OpenAIListModelsResponse: # Get models from registry registry_models = await self.get_all_with_type("model") diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 8ba1f2afdf..97aa2c6e21 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -89,7 +89,7 @@ class LlamaStack( RESOURCES = [ - ("models", Api.models, "register_model", "list_models"), + ("models", Api.models, "register_model", "openai_list_models"), ("shields", Api.shields, "register_shield", "list_shields"), ("datasets", Api.datasets, "register_dataset", "list_datasets"), ( @@ -156,14 +156,14 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} + response = await models_impl.openai_list_models() + models_list = {m.id: m for m in response.data if m.custom_metadata["model_type"] == "embedding"} default_model = models_list.get(default_model_id) if default_model is None: raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}") - embedding_dimension = default_model.metadata.get("embedding_dimension") + embedding_dimension = default_model.custom_metadata.get("embedding_dimension") if embedding_dimension is None: raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py index f919d2afd7..dd3fbb6708 100644 --- a/src/llama_stack_api/__init__.py +++ b/src/llama_stack_api/__init__.py @@ -214,7 +214,6 @@ ) from .models import ( CommonModelFields, - ListModelsResponse, Model, ModelInput, Models, @@ -569,7 +568,6 @@ "RetrieveBatchRequest", "ListBenchmarksResponse", "ListDatasetsResponse", - "ListModelsResponse", "ListOpenAIChatCompletionResponse", "ListOpenAIFileResponse", "ListOpenAIResponseInputItem", diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py index 3efdfe66bd..848e2e9162 100644 --- a/src/llama_stack_api/models.py +++ b/src/llama_stack_api/models.py @@ -77,10 +77,6 @@ class ModelInput(CommonModelFields): model_config = ConfigDict(protected_namespaces=()) -class ListModelsResponse(BaseModel): - data: list[Model] - - @json_schema_type class OpenAIModel(BaseModel): """A model from OpenAI. @@ -106,13 +102,6 @@ class OpenAIListModelsResponse(BaseModel): @runtime_checkable class Models(Protocol): - async def list_models(self) -> ListModelsResponse: - """List all models. - - :returns: A ListModelsResponse. - """ - ... - @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_models(self) -> OpenAIListModelsResponse: """List models using the OpenAI API. diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index 5f75bc5222..ea966472f8 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -13,7 +13,7 @@ from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config from llama_stack.core.storage.datatypes import ServerStoresConfig, StorageConfig -from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield +from llama_stack_api import Api, ListShieldsResponse, ModelType, OpenAIListModelsResponse, OpenAIModel, Shield class TestVectorStoresValidation: @@ -40,7 +40,7 @@ async def test_validate_missing_model(self): ), ) mock_models = AsyncMock() - mock_models.list_models.return_value = ListModelsResponse(data=[]) + mock_models.openai_list_models.return_value = OpenAIListModelsResponse(data=[]) with pytest.raises(ValueError, match="not found"): await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models}) @@ -68,14 +68,19 @@ async def test_validate_success(self): ), ) mock_models = AsyncMock() - mock_models.list_models.return_value = ListModelsResponse( + mock_models.openai_list_models.return_value = OpenAIListModelsResponse( data=[ - Model( - identifier="p/valid", # Must match provider_id/model_id format - model_type=ModelType.embedding, - metadata={"embedding_dimension": 768}, - provider_id="p", - provider_resource_id="valid", + OpenAIModel( + id="p/valid", # Must match provider_id/model_id format + object="model", + created=857142, + owned_by="llama_stack", + custom_metadata={ + "embedding_dimension": 768, + "model_type": ModelType.embedding.value, + "provider_id": "p", + "provider_resource_id": "valid", + }, ) ] ) diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 292ee8384e..3b5f33b433 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -160,9 +160,9 @@ async def test_models_routing_table(cached_disk_dist_registry): await table.register_model(model_id="test-model", provider_id="test_provider") await table.register_model(model_id="test-model-2", provider_id="test_provider") - models = await table.list_models() - assert len(models.data) == 2 - model_ids = {m.identifier for m in models.data} + models = await table.get_all_with_type("model") + assert len(models) == 2 + model_ids = {m.identifier for m in models} assert "test_provider/test-model" in model_ids assert "test_provider/test-model-2" in model_ids @@ -199,7 +199,7 @@ async def test_models_routing_table(cached_disk_dist_registry): await table.unregister_model(model_id="test_provider/test-model") await table.unregister_model(model_id="test_provider/test-model-2") - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 0 # Test openai list models @@ -321,9 +321,9 @@ async def test_double_registration_models_positive(cached_disk_dist_registry): await table.register_model(model_id="test-model", provider_id="test_provider", metadata={"param1": "value1"}) # Verify only one model exists - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 1 - assert models.data[0].identifier == "test_provider/test-model" + assert models.data[0].id == "test_provider/test-model" async def test_double_registration_models_negative(cached_disk_dist_registry): @@ -407,9 +407,9 @@ async def test_double_registration_different_providers(cached_disk_dist_registry await table.register_model(model_id="shared-model", provider_id="provider2") # Verify both models exist with different identifiers - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 2 - model_ids = {m.identifier for m in models.data} + model_ids = {m.id for m in models.data} assert "provider1/shared-model" in model_ids assert "provider2/shared-model" in model_ids @@ -472,11 +472,11 @@ async def test_models_alias_registration_and_lookup(cached_disk_dist_registry): ) # Verify the model was registered with alias as identifier (not namespaced) - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 1 model = models.data[0] - assert model.identifier == "test_provider/actual-provider-model" - assert model.provider_resource_id == "actual-provider-model" + assert model.id == "test_provider/actual-provider-model" + assert model.custom_metadata["provider_resource_id"] == "actual-provider-model" # Test lookup by alias fails with pytest.raises(ModelNotFoundError, match="Model 'my-alias' not found"): @@ -499,9 +499,9 @@ async def test_models_multi_provider_disambiguation(cached_disk_dist_registry): await table.register_model(model_id="common-model", provider_id="provider2") # Verify both models get namespaced identifiers - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 2 - identifiers = {m.identifier for m in models.data} + identifiers = {m.id for m in models.data} assert identifiers == {"provider1/common-model", "provider2/common-model"} # Test lookup by full namespaced identifier works @@ -527,11 +527,11 @@ async def test_models_fallback_lookup_behavior(cached_disk_dist_registry): await table.register_model(model_id="test-model", provider_id="test_provider") # Verify namespaced identifier was created - models = await table.list_models() + models = await table.openai_list_models() assert len(models.data) == 1 model = models.data[0] - assert model.identifier == "test_provider/test-model" - assert model.provider_resource_id == "test-model" + assert model.id == "test_provider/test-model" + assert model.custom_metadata["provider_resource_id"] == "test-model" # Test lookup by full namespaced identifier (direct hit via get_object_by_identifier) retrieved_model = await table.get_model("test_provider/test-model") @@ -554,9 +554,9 @@ async def test_models_source_tracking_default(cached_disk_dist_registry): # Register model via register_model (should get default source) await table.register_model(model_id="user-model", provider_id="test_provider") - models = await table.list_models() - assert len(models.data) == 1 - model = models.data[0] + models = await table.get_all_with_type("model") + assert len(models) == 1 + model = models[0] assert model.source == RegistryEntrySource.via_register_api assert model.identifier == "test_provider/user-model" @@ -588,11 +588,11 @@ async def test_models_source_tracking_provider(cached_disk_dist_registry): ] await table.update_registered_models("test_provider", provider_models) - models = await table.list_models() - assert len(models.data) == 2 + models = await table.get_all_with_type("model") + assert len(models) == 2 # All models should have provider source - for model in models.data: + for model in models: assert model.source == RegistryEntrySource.listed_from_provider assert model.provider_id == "test_provider" @@ -611,9 +611,9 @@ async def test_models_source_interaction_preserves_default(cached_disk_dist_regi ) # Verify user model is registered with default source - models = await table.list_models() - assert len(models.data) == 1 - user_model = models.data[0] + models = await table.get_all_with_type("model") + assert len(models) == 1 + user_model = models[0] assert user_model.source == RegistryEntrySource.via_register_api assert user_model.identifier == "test_provider/provider-model-1" assert user_model.provider_resource_id == "provider-model-1" @@ -638,13 +638,12 @@ async def test_models_source_interaction_preserves_default(cached_disk_dist_regi await table.update_registered_models("test_provider", provider_models) # Verify user model with alias is preserved, but provider added new model - models = await table.list_models() - assert len(models.data) == 2 + models = await table.get_all_with_type("model") + assert len(models) == 2 # Find the user model and provider model - user_model = next((m for m in models.data if m.identifier == "test_provider/provider-model-1"), None) - provider_model = next((m for m in models.data if m.identifier == "test_provider/different-model"), None) - + user_model = next((m for m in models if m.identifier == "test_provider/provider-model-1"), None) + provider_model = next((m for m in models if m.identifier == "test_provider/different-model"), None) assert user_model is not None assert user_model.source == RegistryEntrySource.via_register_api assert user_model.provider_resource_id == "provider-model-1" @@ -678,8 +677,8 @@ async def test_models_source_interaction_cleanup_provider_models(cached_disk_dis await table.update_registered_models("test_provider", provider_models_v1) # Verify we have both user and provider models - models = await table.list_models() - assert len(models.data) == 2 + models = await table.get_all_with_type("model") + assert len(models) == 2 # Now update with new provider models (should remove old provider models) provider_models_v2 = [ @@ -694,17 +693,17 @@ async def test_models_source_interaction_cleanup_provider_models(cached_disk_dis await table.update_registered_models("test_provider", provider_models_v2) # Should have user model + new provider model, old provider model gone - models = await table.list_models() - assert len(models.data) == 2 + models = await table.get_all_with_type("model") + assert len(models) == 2 - identifiers = {m.identifier for m in models.data} + identifiers = {m.identifier for m in models} assert "test_provider/user-model" in identifiers # User model preserved assert "test_provider/provider-model-new" in identifiers # New provider model (uses provider's identifier) assert "test_provider/provider-model-old" not in identifiers # Old provider model removed # Verify sources are correct - user_model = next((m for m in models.data if m.identifier == "test_provider/user-model"), None) - provider_model = next((m for m in models.data if m.identifier == "test_provider/provider-model-new"), None) + user_model = next((m for m in models if m.identifier == "test_provider/user-model"), None) + provider_model = next((m for m in models if m.identifier == "test_provider/provider-model-new"), None) assert user_model.source == RegistryEntrySource.via_register_api assert provider_model.source == RegistryEntrySource.listed_from_provider diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index bf6a24c906..5e1b58062d 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -68,7 +68,7 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await registry.register(model_data_scientist) mock_get_authenticated_user.return_value = User("test-user", {"roles": ["admin"], "teams": ["management"]}) - all_models = await routing_table.list_models() + all_models = await routing_table.openai_list_models() assert len(all_models.data) == 2 model = await routing_table.get_model("model-public") @@ -79,9 +79,9 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await routing_table.get_model("model-data-scientist") mock_get_authenticated_user.return_value = User("test-user", {"roles": ["data-scientist"], "teams": ["other-team"]}) - all_models = await routing_table.list_models() + all_models = await routing_table.openai_list_models() assert len(all_models.data) == 1 - assert all_models.data[0].identifier == "model-public" + assert all_models.data[0].id == "model-public" model = await routing_table.get_model("model-public") assert model.identifier == "model-public" with pytest.raises(ValueError): @@ -90,9 +90,9 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await routing_table.get_model("model-data-scientist") mock_get_authenticated_user.return_value = User("test-user", {"roles": ["data-scientist"], "teams": ["ml-team"]}) - all_models = await routing_table.list_models() + all_models = await routing_table.openai_list_models() assert len(all_models.data) == 2 - model_ids = [m.identifier for m in all_models.data] + model_ids = [m.id for m in all_models.data] assert "model-public" in model_ids assert "model-data-scientist" in model_ids assert "model-admin" not in model_ids @@ -161,8 +161,8 @@ async def test_access_control_empty_attributes(mock_get_authenticated_user, test ) result = await routing_table.get_model("model-empty-attrs") assert result.identifier == "model-empty-attrs" - all_models = await routing_table.list_models() - model_ids = [m.identifier for m in all_models.data] + all_models = await routing_table.openai_list_models() + model_ids = [m.id for m in all_models.data] assert "model-empty-attrs" in model_ids @@ -191,9 +191,9 @@ async def test_no_user_attributes(mock_get_authenticated_user, test_setup): with pytest.raises(ValueError): await routing_table.get_model("model-restricted") - all_models = await routing_table.list_models() + all_models = await routing_table.openai_list_models() assert len(all_models.data) == 1 - assert all_models.data[0].identifier == "model-public-2" + assert all_models.data[0].id == "model-public-2" @patch("llama_stack.core.routing_tables.common.get_authenticated_user") @@ -718,8 +718,8 @@ async def test_dynamic_models_respect_rbac( mock_get_user_common.return_value = admin_user mock_get_user_models.return_value = admin_user - result = await routing_table.list_models() - model_ids = [m.identifier for m in result.data] + result = await routing_table.openai_list_models() + model_ids = [m.id for m in result.data] assert "test-provider/dynamic-model-1" in model_ids assert "test-provider/dynamic-model-2" in model_ids @@ -729,7 +729,7 @@ async def test_dynamic_models_respect_rbac( mock_get_user_common.return_value = restricted_user mock_get_user_models.return_value = restricted_user - result = await routing_table.list_models() - model_ids = [m.identifier for m in result.data] + result = await routing_table.openai_list_models() + model_ids = [m.id for m in result.data] # Restricted user should see no models (no ownership, not admin) assert len(model_ids) == 0