{ lib, stdenv, buildPythonPackage, fetchFromGitHub, # build-system setuptools, # dependencies cloudevents, fastapi, grpc-interceptor, grpcio, grpcio-tools, httpx, kubernetes, numpy, orjson, pandas, prometheus-client, protobuf, psutil, pydantic, python-dateutil, pyyaml, six, tabulate, timing-asgi, uvicorn, # optional-dependencies # storage kserve-storage, # logging asgi-logger, # ray ray, # llm vllm, # tests avro, grpcio-testing, jinja2, pytest-asyncio, pytest-cov-stub, pytest-httpx, pytest-xdist, pytestCheckHook, tomlkit, }: buildPythonPackage rec { pname = "kserve"; version = "0.16.0"; pyproject = true; src = fetchFromGitHub { owner = "kserve"; repo = "kserve"; tag = "v${version}"; hash = "sha256-f6ILZMLxfckEpy7wSgCqUx89JWSnn0DbQiqRSHcQHms="; }; # Fix vllm 0.12.0 compatibility # Patch submitted upstream: https://github.com/kserve/kserve/pull/4882 postPatch = '' substituteInPlace kserve/protocol/rest/openai/types/__init__.py \ --replace-fail \ "from vllm.entrypoints.openai.protocol import EmbeddingRequest, EmbeddingResponse as Embedding, EmbeddingResponseData, EmbeddingCompletionRequest" \ "from vllm.entrypoints.pooling.embed.protocol import EmbeddingRequest, EmbeddingResponse as Embedding, EmbeddingResponseData, EmbeddingCompletionRequest" \ --replace-fail \ "from vllm.entrypoints.openai.protocol import RerankRequest, RerankResponse as Rerank" \ "from vllm.entrypoints.pooling.score.protocol import RerankRequest, RerankResponse as Rerank" ''; sourceRoot = "${src.name}/python/kserve"; pythonRelaxDeps = [ "fastapi" "httpx" "numpy" "prometheus-client" "protobuf" "uvicorn" "psutil" ]; build-system = [ setuptools ]; dependencies = [ cloudevents fastapi grpc-interceptor grpcio grpcio-tools httpx kubernetes numpy orjson pandas prometheus-client protobuf psutil pydantic python-dateutil pyyaml six tabulate timing-asgi uvicorn ] ++ uvicorn.optional-dependencies.standard; optional-dependencies = { storage = [ kserve-storage ]; logging = [ asgi-logger ]; ray = [ ray ] ++ ray.optional-dependencies.serve; llm = [ vllm ]; }; nativeCheckInputs = [ avro grpcio-testing jinja2 pytest-asyncio pytest-cov-stub pytest-httpx pytest-xdist pytestCheckHook tomlkit ] ++ lib.concatAttrValues optional-dependencies; pythonImportsCheck = [ "kserve" ]; disabledTestPaths = [ # Looks for a config file at the root of the repository "test/test_inference_service_client.py" # AssertionError "test/test_server.py::TestTFHttpServerLoadAndUnLoad::test_unload" # Race condition when called concurrently between two instances of the same model (i.e. in nixpkgs-review) "test/test_dataplane.py::TestDataPlane::test_model_metadata[TEST_RAY_SERVE_MODEL]" ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ # RuntimeError: Failed to start GCS "test/test_dataplane.py::TestDataPlane::test_explain" "test/test_dataplane.py::TestDataPlane::test_infer" "test/test_dataplane.py::TestDataPlane::test_model_metadata" "test/test_dataplane.py::TestDataPlane::test_server_readiness" "test/test_server.py::TestRayServer::test_explain" "test/test_server.py::TestRayServer::test_health_handler" "test/test_server.py::TestRayServer::test_infer" "test/test_server.py::TestRayServer::test_list_handler" "test/test_server.py::TestRayServer::test_liveness_handler" "test/test_server.py::TestRayServer::test_predict" # Permission Error "test/test_server.py::TestMutiProcessServer::test_rest_server_multiprocess" ]; disabledTests = [ # Started failing since vllm was updated to 0.13.0 # pydantic_core._pydantic_core.ValidationError: 1 validation error for RerankResponse # usage.prompt_tokens # Field required [type=missing, input_value={'total_tokens': 100}, input_type=dict] # For further information visit https://errors.pydantic.dev/2.11/v/missing "test_create_rerank" "test_create_embedding" # AssertionError: assert CompletionReq...lm_xargs=None) == CompletionReq...lm_xargs=None) "test_convert_params" # Flaky: ray.exceptions.ActorDiedError: The actor died unexpectedly before finishing this task. "test_explain" "test_infer" "test_predict" # Require network access "test_infer_graph_endpoint" "test_infer_path_based_routing" # Tries to access `/tmp` (hardcoded) "test_local_path_with_out_dir_exist" ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ "test_local_path_with_out_dir_not_exist" ]; __darwinAllowLocalNetworking = true; meta = { description = "Standardized Serverless ML Inference Platform on Kubernetes"; homepage = "https://github.com/kserve/kserve/tree/master/python/kserve"; changelog = "https://github.com/kserve/kserve/releases/tag/${src.tag}"; license = lib.licenses.asl20; maintainers = with lib.maintainers; [ GaetanLepage ]; }; }