| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- from typing import List
- from PIL import Image
- import base64
- import io
- from langchain_openai import OpenAIEmbeddings
- from dashscope import MultiModalEmbedding
- from conf.config import ModelConfig
- from utils.minio.image_util import ImageUtil
- class Embedding:
- """Embedding模型工具"""
-
- def __init__(self, model_name: str = None, api_key: str = None):
- """
- 初始化Embedding模型
-
- Args:
- model_name: 模型名称,若为None则使用配置文件中的值
- api_key: API密钥,若为None则使用配置文件中的值
- """
- # 获取模型配置
- self.model_provider = ModelConfig.get_model_provider()
- self.model_name = model_name or ModelConfig.get_model_name()
- self.base_url = ModelConfig.get_base_url()
- self.api_key = api_key or ModelConfig.get_api_key()
-
- # 使用langchain_openai初始化OpenAI Embeddings模型
- self.embeddings = OpenAIEmbeddings(
- model=self.model_name,
- base_url=self.base_url,
- api_key=self.api_key
- )
-
- def get_text_embedding(self, text: str) -> List[float]:
- """
- 获取文本的embedding
-
- Args:
- text: 要获取embedding的文本
-
- Returns:
- List[float]: 文本的embedding向量
- """
- try:
- embedding = self.embeddings.embed_query(text)
- return embedding
- except Exception as e:
- raise Exception(f"文本embedding生成失败: {str(e)}")
-
- def get_texts_embedding(self, texts: List[str]) -> List[List[float]]:
- """
- 获取多个文本的embedding
-
- Args:
- texts: 要获取embedding的文本列表
-
- Returns:
- List[List[float]]: 文本列表的embedding向量列表
- """
- try:
- embeddings = self.embeddings.embed_documents(texts)
- return embeddings
- except Exception as e:
- raise Exception(f"多个文本embedding生成失败: {str(e)}")
-
- def get_image_embedding(self, image: Image.Image) -> List[float]:
- """
- 获取图像的embedding
-
- Args:
- image: PIL图像对象
-
- Returns:
- List[float]: 图像的embedding向量
- """
- try:
- # 将图像转换为base64
- buffer = io.BytesIO()
- image.save(buffer, format="PNG")
- buffer.seek(0)
-
- # 压缩图片字节流
- compressed_bytes = ImageUtil._compress_image_to_bytes(buffer)
-
- image_base64 = base64.b64encode(compressed_bytes).decode("utf-8")
- # 构建输入项
- item = [
- {
- "image": f"data:image/png;base64,{image_base64}"
- }
- ]
- response = MultiModalEmbedding.call(
- model=self.model_name,
- api_key=self.api_key,
- input=item
- )
- if response.status_code == 200:
- return response.output["embeddings"][0]["embedding"]
- except Exception as e:
- raise Exception(f"图像embedding生成失败: {str(e)}")
-
- def get_multimodal_embedding(self, text: str, image: Image.Image) -> List[float]:
- """
- 获取多模态(文本+图像)的embedding
-
- Args:
- text: 文本内容
- image: PIL图像对象
-
- Returns:
- List[float]: 多模态的embedding向量
- """
- try:
- item = []
- if image is not None:
- buffer = io.BytesIO()
- image.save(buffer, format="PNG")
- buffer.seek(0)
-
- # 压缩图片字节流
- compressed_bytes = ImageUtil()._compress_image_to_bytes(buffer)
-
- image_base64 = base64.b64encode(compressed_bytes).decode("utf-8")
- item.append({'image': f"data:image/png;base64,{image_base64}"})
- # 判断text部位None或者不为空字符串
- if text is not None and text.strip() != "":
- item.append({'text': text})
-
- response = MultiModalEmbedding.call(
- model=self.model_name,
- api_key=self.api_key,
- input=item
- )
-
- if response.status_code == 200:
- return response.output["embeddings"][0]["embedding"]
- else:
- raise Exception(f"Error: {response.message}")
- except Exception as e:
- raise Exception(f"多模态embedding生成失败: {str(e)}")
|