You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
D:\envs\hss-inference\Lib\site-packages\transformers\modeling_utils.py:5055: FutureWarning: _is_quantized_training_enabled is going to be deprecated in transformers 4.39.0. Please use model.hf_quantizer.is_trainable instead
warnings.warn(
2024-12-31 09:59:09,427 transformers.modeling_utils 28084 WARNING loss_type=None was set in the config but it is unrecognised.Using the default loss: ForCausalLMLoss. loss_type=None was set in the config but it is unrecognised.Using the default loss: ForCausalLMLoss.
0it [00:00, ?it/s]
2024-12-31 09:59:09,457 xinference.core.worker 13732 ERROR Failed to load model qwen2.5-coder-instruct-0
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
2024-12-31 09:59:09,502 xinference.core.worker 13732 ERROR [request cbdc5337-c71a-11ef-9dc3-1091d1d48cc7] Leave launch_builtin_model, error: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight', elapsed time: 11 s
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\utils.py", line 90, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
2024-12-31 09:59:09,502 xinference.api.restful_api 8008 ERROR [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\api\restful_api.py", line 1002, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 1041, in launch_builtin_model
await _launch_model()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 1005, in _launch_model
await _launch_one_model(rep_model_uid)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 984, in _launch_one_model
await worker_ref.launch_builtin_model(
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar\core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\utils.py", line 90, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
Expected behavior / 期待表现
模型能正常启动
The text was updated successfully, but these errors were encountered:
System Info / 系統信息
cuda:12.4
transformers:Qwen2.5-Coder-14B-Instruct
Python:3.11.0
windows11
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
Version info / 版本信息
1.1.1
The command used to start Xinference / 用以启动 xinference 的命令
xinference-local -H 192.168.3.7 --port 7861 --auth-config auth.json
Reproduction / 复现过程
1、启动xinference
2、在模型启动页面选择Qwen2.5-Coder-14B-Instruct,指定model engine为transformer,model format为pytorch,mode size为14, quantization为8-bit
3、点启动按钮
控制台报错如下:
2024-12-31 09:59:09,321 transformers.modeling_utils 28084 INFO Instantiating Qwen2ForCausalLM model under default dtype torch.float32.
Instantiating Qwen2ForCausalLM model under default dtype torch.float32.
2024-12-31 09:59:09,321 transformers.generation.configuration_utils 28084 INFO Generate config GenerationConfig {
"bos_token_id": 151643,
"eos_token_id": 151645
}
Generate config GenerationConfig {
"bos_token_id": 151643,
"eos_token_id": 151645
}
D:\envs\hss-inference\Lib\site-packages\transformers\modeling_utils.py:5055: FutureWarning:
_is_quantized_training_enabled
is going to be deprecated in transformers 4.39.0. Please usemodel.hf_quantizer.is_trainable
insteadwarnings.warn(
2024-12-31 09:59:09,427 transformers.modeling_utils 28084 WARNING
loss_type=None
was set in the config but it is unrecognised.Using the default loss:ForCausalLMLoss
.loss_type=None
was set in the config but it is unrecognised.Using the default loss:ForCausalLMLoss
.0it [00:00, ?it/s]
2024-12-31 09:59:09,457 xinference.core.worker 13732 ERROR Failed to load model qwen2.5-coder-instruct-0
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
2024-12-31 09:59:09,502 xinference.core.worker 13732 ERROR [request cbdc5337-c71a-11ef-9dc3-1091d1d48cc7] Leave launch_builtin_model, error: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight', elapsed time: 11 s
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\utils.py", line 90, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
2024-12-31 09:59:09,502 xinference.api.restful_api 8008 ERROR [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
Traceback (most recent call last):
File "D:\envs\hss-inference\Lib\site-packages\xinference\api\restful_api.py", line 1002, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 1041, in launch_builtin_model
await _launch_model()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 1005, in _launch_model
await _launch_one_model(rep_model_uid)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\supervisor.py", line 984, in _launch_one_model
await worker_ref.launch_builtin_model(
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar\core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\utils.py", line 90, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\worker.py", line 897, in launch_builtin_model
await model_ref.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 231, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xoscar\backends\pool.py", line 370, in _run_coro
return await coro
File "D:\envs\hss-inference\Lib\site-packages\xoscar\api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 558, in on_receive
raise ex
File "xoscar\core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar\core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\core\model.py", line 409, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 672, in load
super().load()
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\core.py", line 306, in load
) = load_compress_model(
^^^^^^^^^^^^^^^^^
File "D:\envs\hss-inference\Lib\site-packages\xinference\model\llm\transformers\compression.py", line 163, in load_compress_model
model, name, device, value=compressed_state_dict[name]
^^^^^^^^^^^^^^^^^
KeyError: [address=192.168.3.7:63990, pid=28084] 'model.embed_tokens.weight'
Expected behavior / 期待表现
模型能正常启动
The text was updated successfully, but these errors were encountered: