feat: add embeddings model management (#1461)

* feat: add embeddings model management backend support

Co-Authored-By: Junyan Qin <Chin> <rockchinq@gmail.com>

* feat: add embeddings model management frontend support

Co-Authored-By: Junyan Qin <Chin> <rockchinq@gmail.com>

* chore: revert HttpClient URL to production setting

Co-Authored-By: Junyan Qin <Chin> <rockchinq@gmail.com>

* refactor: integrate embeddings models into models page with tabs

Co-Authored-By: Junyan Qin <Chin> <rockchinq@gmail.com>

* perf: move files

* perf: remove `s`

* feat: allow requester to declare supported types in manifest

* feat(embedding): delete dimension and encoding format

* feat: add extra_args for embedding moels

* perf: i18n ref

* fix: linter err

* fix: lint err

* fix: linter err

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Junyan Qin <Chin> <rockchinq@gmail.com>
This commit is contained in:
devin-ai-integration[bot]
2025-05-21 12:42:39 +08:00
committed by Junyan Qin
parent a01706d163
commit d2b93b3296
43 changed files with 1370 additions and 64 deletions

View File

@@ -9,18 +9,18 @@ class LLMModelsRouterGroup(group.RouterGroup):
@self.route('', methods=['GET', 'POST'])
async def _() -> str:
if quart.request.method == 'GET':
return self.success(data={'models': await self.ap.model_service.get_llm_models()})
return self.success(data={'models': await self.ap.llm_model_service.get_llm_models()})
elif quart.request.method == 'POST':
json_data = await quart.request.json
model_uuid = await self.ap.model_service.create_llm_model(json_data)
model_uuid = await self.ap.llm_model_service.create_llm_model(json_data)
return self.success(data={'uuid': model_uuid})
@self.route('/<model_uuid>', methods=['GET', 'PUT', 'DELETE'])
async def _(model_uuid: str) -> str:
if quart.request.method == 'GET':
model = await self.ap.model_service.get_llm_model(model_uuid)
model = await self.ap.llm_model_service.get_llm_model(model_uuid)
if model is None:
return self.http_status(404, -1, 'model not found')
@@ -29,11 +29,11 @@ class LLMModelsRouterGroup(group.RouterGroup):
elif quart.request.method == 'PUT':
json_data = await quart.request.json
await self.ap.model_service.update_llm_model(model_uuid, json_data)
await self.ap.llm_model_service.update_llm_model(model_uuid, json_data)
return self.success()
elif quart.request.method == 'DELETE':
await self.ap.model_service.delete_llm_model(model_uuid)
await self.ap.llm_model_service.delete_llm_model(model_uuid)
return self.success()
@@ -41,6 +41,49 @@ class LLMModelsRouterGroup(group.RouterGroup):
async def _(model_uuid: str) -> str:
json_data = await quart.request.json
await self.ap.model_service.test_llm_model(model_uuid, json_data)
await self.ap.llm_model_service.test_llm_model(model_uuid, json_data)
return self.success()
@group.group_class('models/embedding', '/api/v1/provider/models/embedding')
class EmbeddingModelsRouterGroup(group.RouterGroup):
async def initialize(self) -> None:
@self.route('', methods=['GET', 'POST'])
async def _() -> str:
if quart.request.method == 'GET':
return self.success(data={'models': await self.ap.embedding_models_service.get_embedding_models()})
elif quart.request.method == 'POST':
json_data = await quart.request.json
model_uuid = await self.ap.embedding_models_service.create_embedding_model(json_data)
return self.success(data={'uuid': model_uuid})
@self.route('/<model_uuid>', methods=['GET', 'PUT', 'DELETE'])
async def _(model_uuid: str) -> str:
if quart.request.method == 'GET':
model = await self.ap.embedding_models_service.get_embedding_model(model_uuid)
if model is None:
return self.http_status(404, -1, 'model not found')
return self.success(data={'model': model})
elif quart.request.method == 'PUT':
json_data = await quart.request.json
await self.ap.embedding_models_service.update_embedding_model(model_uuid, json_data)
return self.success()
elif quart.request.method == 'DELETE':
await self.ap.embedding_models_service.delete_embedding_model(model_uuid)
return self.success()
@self.route('/<model_uuid>/test', methods=['POST'])
async def _(model_uuid: str) -> str:
json_data = await quart.request.json
await self.ap.embedding_models_service.test_embedding_model(model_uuid, json_data)
return self.success()

View File

@@ -8,7 +8,8 @@ class RequestersRouterGroup(group.RouterGroup):
async def initialize(self) -> None:
@self.route('', methods=['GET'])
async def _() -> quart.Response:
return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info()})
model_type = quart.request.args.get('type', '')
return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info(model_type)})
@self.route('/<requester_name>', methods=['GET'])
async def _(requester_name: str) -> quart.Response:

View File

@@ -10,7 +10,7 @@ from ....provider.modelmgr import requester as model_requester
from ....provider import entities as llm_entities
class ModelsService:
class LLMModelsService:
ap: app.Application
def __init__(self, ap: app.Application) -> None:
@@ -103,3 +103,90 @@ class ModelsService:
funcs=[],
extra_args={},
)
class EmbeddingModelsService:
ap: app.Application
def __init__(self, ap: app.Application) -> None:
self.ap = ap
async def get_embedding_models(self) -> list[dict]:
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel))
models = result.all()
return [self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model) for model in models]
async def create_embedding_model(self, model_data: dict) -> str:
model_data['uuid'] = str(uuid.uuid4())
await self.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_model.EmbeddingModel).values(**model_data)
)
embedding_model = await self.get_embedding_model(model_data['uuid'])
await self.ap.model_mgr.load_embedding_model(embedding_model)
return model_data['uuid']
async def get_embedding_model(self, model_uuid: str) -> dict | None:
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_model.EmbeddingModel).where(
persistence_model.EmbeddingModel.uuid == model_uuid
)
)
model = result.first()
if model is None:
return None
return self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model)
async def update_embedding_model(self, model_uuid: str, model_data: dict) -> None:
if 'uuid' in model_data:
del model_data['uuid']
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_model.EmbeddingModel)
.where(persistence_model.EmbeddingModel.uuid == model_uuid)
.values(**model_data)
)
await self.ap.model_mgr.remove_embedding_model(model_uuid)
embedding_model = await self.get_embedding_model(model_uuid)
await self.ap.model_mgr.load_embedding_model(embedding_model)
async def delete_embedding_model(self, model_uuid: str) -> None:
await self.ap.persistence_mgr.execute_async(
sqlalchemy.delete(persistence_model.EmbeddingModel).where(
persistence_model.EmbeddingModel.uuid == model_uuid
)
)
await self.ap.model_mgr.remove_embedding_model(model_uuid)
async def test_embedding_model(self, model_uuid: str, model_data: dict) -> None:
runtime_embedding_model: model_requester.RuntimeEmbeddingModel | None = None
if model_uuid != '_':
for model in self.ap.model_mgr.embedding_models:
if model.model_entity.uuid == model_uuid:
runtime_embedding_model = model
break
if runtime_embedding_model is None:
raise Exception('model not found')
else:
runtime_embedding_model = await self.ap.model_mgr.init_runtime_embedding_model(model_data)
await runtime_embedding_model.requester.invoke_embedding(
query=None,
model=runtime_embedding_model,
input_text='Hello, world!',
extra_args={},
)