from typing import List from PIL import Image import base64 import io from langchain_openai import OpenAIEmbeddings from dashscope import MultiModalEmbedding from conf.config import ModelConfig from utils.minio.image_util import ImageUtil class Embedding: """Embedding模型工具""" def __init__(self, model_name: str = None, api_key: str = None): """ 初始化Embedding模型 Args: model_name: 模型名称,若为None则使用配置文件中的值 api_key: API密钥,若为None则使用配置文件中的值 """ # 获取模型配置 self.model_provider = ModelConfig.get_model_provider() self.model_name = model_name or ModelConfig.get_model_name() self.base_url = ModelConfig.get_base_url() self.api_key = api_key or ModelConfig.get_api_key() # 使用langchain_openai初始化OpenAI Embeddings模型 self.embeddings = OpenAIEmbeddings( model=self.model_name, base_url=self.base_url, api_key=self.api_key ) def get_text_embedding(self, text: str) -> List[float]: """ 获取文本的embedding Args: text: 要获取embedding的文本 Returns: List[float]: 文本的embedding向量 """ try: embedding = self.embeddings.embed_query(text) return embedding except Exception as e: raise Exception(f"文本embedding生成失败: {str(e)}") def get_texts_embedding(self, texts: List[str]) -> List[List[float]]: """ 获取多个文本的embedding Args: texts: 要获取embedding的文本列表 Returns: List[List[float]]: 文本列表的embedding向量列表 """ try: embeddings = self.embeddings.embed_documents(texts) return embeddings except Exception as e: raise Exception(f"多个文本embedding生成失败: {str(e)}") def get_image_embedding(self, image: Image.Image) -> List[float]: """ 获取图像的embedding Args: image: PIL图像对象 Returns: List[float]: 图像的embedding向量 """ try: # 将图像转换为base64 buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) # 压缩图片字节流 compressed_bytes = ImageUtil._compress_image_to_bytes(buffer) image_base64 = base64.b64encode(compressed_bytes).decode("utf-8") # 构建输入项 item = [ { "image": f"data:image/png;base64,{image_base64}" } ] response = MultiModalEmbedding.call( model=self.model_name, api_key=self.api_key, input=item ) if response.status_code == 200: return response.output["embeddings"][0]["embedding"] except Exception as e: raise Exception(f"图像embedding生成失败: {str(e)}") def get_multimodal_embedding(self, text: str, image: Image.Image) -> List[float]: """ 获取多模态(文本+图像)的embedding Args: text: 文本内容 image: PIL图像对象 Returns: List[float]: 多模态的embedding向量 """ try: item = [] if image is not None: buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) # 压缩图片字节流 compressed_bytes = ImageUtil()._compress_image_to_bytes(buffer) image_base64 = base64.b64encode(compressed_bytes).decode("utf-8") item.append({'image': f"data:image/png;base64,{image_base64}"}) # 判断text部位None或者不为空字符串 if text is not None and text.strip() != "": item.append({'text': text}) response = MultiModalEmbedding.call( model=self.model_name, api_key=self.api_key, input=item ) if response.status_code == 200: return response.output["embeddings"][0]["embedding"] else: raise Exception(f"Error: {response.message}") except Exception as e: raise Exception(f"多模态embedding生成失败: {str(e)}")