test_dtos.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. """
  2. 向量搜索 DTOs 单元测试
  3. 测试向量搜索应用层的数据传输对象(DTOs)。
  4. 验证 DTO 与领域实体之间的转换、序列化和反序列化功能。
  5. """
  6. import pytest
  7. from datetime import datetime
  8. from src.application.vector_search.dtos import (
  9. DocumentDTO,
  10. SearchResultDTO,
  11. SearchResponseDTO
  12. )
  13. from src.domain.vector_search.entities import Document, SearchResult
  14. from src.domain.vector_search.value_objects import Vector
  15. from src.domain.shared.value_objects import EntityId, Timestamp
  16. class TestDocumentDTO:
  17. """测试 DocumentDTO 类"""
  18. def test_from_entity_basic(self):
  19. """测试从领域实体创建 DTO(基本情况)"""
  20. # Arrange
  21. doc = Document(
  22. id=EntityId(value="doc_123"),
  23. content="Test content",
  24. embedding=None,
  25. metadata={"source": "test", "author": "John"},
  26. created_at=Timestamp.from_iso_string("2024-01-01T00:00:00"),
  27. updated_at=Timestamp.from_iso_string("2024-01-01T12:00:00")
  28. )
  29. # Act
  30. dto = DocumentDTO.from_entity(doc)
  31. # Assert
  32. assert dto.id == "doc_123"
  33. assert dto.content == "Test content"
  34. assert dto.metadata == {"source": "test", "author": "John"}
  35. assert dto.embedding is None
  36. assert dto.score is None
  37. assert dto.created_at == "2024-01-01T00:00:00"
  38. assert dto.updated_at == "2024-01-01T12:00:00"
  39. def test_from_entity_with_embedding(self):
  40. """测试从领域实体创建 DTO(包含嵌入向量)"""
  41. # Arrange
  42. vector = Vector(dimensions=[1.0, 2.0, 3.0])
  43. doc = Document(
  44. id=EntityId(value="doc_456"),
  45. content="Test content with embedding",
  46. embedding=vector,
  47. metadata={},
  48. created_at=Timestamp.now(),
  49. updated_at=Timestamp.now()
  50. )
  51. # Act
  52. dto = DocumentDTO.from_entity(doc, include_embedding=True)
  53. # Assert
  54. assert dto.embedding == [1.0, 2.0, 3.0]
  55. def test_from_entity_without_embedding(self):
  56. """测试从领域实体创建 DTO(不包含嵌入向量)"""
  57. # Arrange
  58. vector = Vector(dimensions=[1.0, 2.0, 3.0])
  59. doc = Document(
  60. id=EntityId(value="doc_789"),
  61. content="Test content",
  62. embedding=vector,
  63. metadata={},
  64. created_at=Timestamp.now(),
  65. updated_at=Timestamp.now()
  66. )
  67. # Act
  68. dto = DocumentDTO.from_entity(doc, include_embedding=False)
  69. # Assert
  70. assert dto.embedding is None
  71. def test_from_entity_with_score(self):
  72. """测试从领域实体创建 DTO(包含分数)"""
  73. # Arrange
  74. doc = Document(
  75. id=EntityId.generate(),
  76. content="Test content",
  77. embedding=None,
  78. metadata={},
  79. created_at=Timestamp.now(),
  80. updated_at=Timestamp.now()
  81. )
  82. # Act
  83. dto = DocumentDTO.from_entity(doc, score=0.95)
  84. # Assert
  85. assert dto.score == 0.95
  86. def test_to_entity_basic(self):
  87. """测试将 DTO 转换为领域实体(基本情况)"""
  88. # Arrange
  89. dto = DocumentDTO(
  90. id="doc_123",
  91. content="Test content",
  92. metadata={"source": "test"},
  93. embedding=None,
  94. score=None,
  95. created_at="2024-01-01T00:00:00",
  96. updated_at="2024-01-01T12:00:00"
  97. )
  98. # Act
  99. doc = dto.to_entity()
  100. # Assert
  101. assert str(doc.id) == "doc_123"
  102. assert doc.content == "Test content"
  103. assert doc.metadata == {"source": "test"}
  104. assert doc.embedding is None
  105. assert doc.created_at.to_iso_string() == "2024-01-01T00:00:00"
  106. assert doc.updated_at.to_iso_string() == "2024-01-01T12:00:00"
  107. def test_to_entity_with_embedding(self):
  108. """测试将 DTO 转换为领域实体(包含嵌入向量)"""
  109. # Arrange
  110. dto = DocumentDTO(
  111. id="doc_456",
  112. content="Test content",
  113. metadata={},
  114. embedding=[1.0, 2.0, 3.0],
  115. created_at="2024-01-01T00:00:00",
  116. updated_at="2024-01-01T00:00:00"
  117. )
  118. # Act
  119. doc = dto.to_entity()
  120. # Assert
  121. assert doc.embedding is not None
  122. assert doc.embedding.dimensions == [1.0, 2.0, 3.0]
  123. def test_to_dict_basic(self):
  124. """测试将 DTO 转换为字典(基本情况)"""
  125. # Arrange
  126. dto = DocumentDTO(
  127. id="doc_123",
  128. content="Test content",
  129. metadata={"source": "test"},
  130. created_at="2024-01-01T00:00:00",
  131. updated_at="2024-01-01T12:00:00"
  132. )
  133. # Act
  134. data = dto.to_dict()
  135. # Assert
  136. assert data["id"] == "doc_123"
  137. assert data["content"] == "Test content"
  138. assert data["metadata"] == {"source": "test"}
  139. assert "embedding" not in data
  140. assert "score" not in data
  141. assert data["created_at"] == "2024-01-01T00:00:00"
  142. assert data["updated_at"] == "2024-01-01T12:00:00"
  143. def test_to_dict_with_optional_fields(self):
  144. """测试将 DTO 转换为字典(包含可选字段)"""
  145. # Arrange
  146. dto = DocumentDTO(
  147. id="doc_456",
  148. content="Test content",
  149. metadata={},
  150. embedding=[1.0, 2.0, 3.0],
  151. score=0.95,
  152. created_at="2024-01-01T00:00:00",
  153. updated_at="2024-01-01T00:00:00"
  154. )
  155. # Act
  156. data = dto.to_dict()
  157. # Assert
  158. assert data["embedding"] == [1.0, 2.0, 3.0]
  159. assert data["score"] == 0.95
  160. def test_from_dict_basic(self):
  161. """测试从字典创建 DTO(基本情况)"""
  162. # Arrange
  163. data = {
  164. "id": "doc_123",
  165. "content": "Test content",
  166. "metadata": {"source": "test"},
  167. "created_at": "2024-01-01T00:00:00",
  168. "updated_at": "2024-01-01T12:00:00"
  169. }
  170. # Act
  171. dto = DocumentDTO.from_dict(data)
  172. # Assert
  173. assert dto.id == "doc_123"
  174. assert dto.content == "Test content"
  175. assert dto.metadata == {"source": "test"}
  176. assert dto.embedding is None
  177. assert dto.score is None
  178. def test_from_dict_with_optional_fields(self):
  179. """测试从字典创建 DTO(包含可选字段)"""
  180. # Arrange
  181. data = {
  182. "id": "doc_456",
  183. "content": "Test content",
  184. "metadata": {},
  185. "embedding": [1.0, 2.0, 3.0],
  186. "score": 0.95,
  187. "created_at": "2024-01-01T00:00:00",
  188. "updated_at": "2024-01-01T00:00:00"
  189. }
  190. # Act
  191. dto = DocumentDTO.from_dict(data)
  192. # Assert
  193. assert dto.embedding == [1.0, 2.0, 3.0]
  194. assert dto.score == 0.95
  195. def test_roundtrip_entity_to_dto_to_entity(self):
  196. """测试实体 -> DTO -> 实体的往返转换"""
  197. # Arrange
  198. original_doc = Document(
  199. id=EntityId(value="doc_roundtrip"),
  200. content="Roundtrip test content",
  201. embedding=Vector(dimensions=[1.0, 2.0, 3.0]),
  202. metadata={"test": "roundtrip"},
  203. created_at=Timestamp.from_iso_string("2024-01-01T00:00:00"),
  204. updated_at=Timestamp.from_iso_string("2024-01-01T12:00:00")
  205. )
  206. # Act
  207. dto = DocumentDTO.from_entity(original_doc, include_embedding=True)
  208. converted_doc = dto.to_entity()
  209. # Assert
  210. assert str(converted_doc.id) == str(original_doc.id)
  211. assert converted_doc.content == original_doc.content
  212. assert converted_doc.embedding.dimensions == original_doc.embedding.dimensions
  213. assert converted_doc.metadata == original_doc.metadata
  214. assert converted_doc.created_at.to_iso_string() == original_doc.created_at.to_iso_string()
  215. assert converted_doc.updated_at.to_iso_string() == original_doc.updated_at.to_iso_string()
  216. def test_roundtrip_dict_to_dto_to_dict(self):
  217. """测试字典 -> DTO -> 字典的往返转换"""
  218. # Arrange
  219. original_data = {
  220. "id": "doc_dict_roundtrip",
  221. "content": "Dict roundtrip test",
  222. "metadata": {"key": "value"},
  223. "embedding": [1.0, 2.0, 3.0],
  224. "score": 0.88,
  225. "created_at": "2024-01-01T00:00:00",
  226. "updated_at": "2024-01-01T12:00:00"
  227. }
  228. # Act
  229. dto = DocumentDTO.from_dict(original_data)
  230. converted_data = dto.to_dict()
  231. # Assert
  232. assert converted_data == original_data
  233. def test_metadata_is_copied(self):
  234. """测试元数据被复制而不是引用"""
  235. # Arrange
  236. original_metadata = {"key": "value"}
  237. doc = Document(
  238. id=EntityId.generate(),
  239. content="Test",
  240. embedding=None,
  241. metadata=original_metadata,
  242. created_at=Timestamp.now(),
  243. updated_at=Timestamp.now()
  244. )
  245. # Act
  246. dto = DocumentDTO.from_entity(doc)
  247. dto.metadata["new_key"] = "new_value"
  248. # Assert
  249. assert "new_key" not in original_metadata
  250. class TestSearchResultDTO:
  251. """测试 SearchResultDTO 类"""
  252. def test_from_entity_basic(self):
  253. """测试从领域实体创建搜索结果 DTO"""
  254. # Arrange
  255. doc = Document(
  256. id=EntityId(value="doc_123"),
  257. content="Test content",
  258. embedding=None,
  259. metadata={"source": "test"},
  260. created_at=Timestamp.now(),
  261. updated_at=Timestamp.now()
  262. )
  263. search_result = SearchResult(
  264. document=doc,
  265. score=0.95,
  266. rank=0
  267. )
  268. # Act
  269. dto = SearchResultDTO.from_entity(search_result)
  270. # Assert
  271. assert dto.score == 0.95
  272. assert dto.rank == 0
  273. assert dto.document.id == "doc_123"
  274. assert dto.document.content == "Test content"
  275. assert dto.document.score == 0.95 # 分数应该传递到文档 DTO
  276. def test_from_entity_with_embedding(self):
  277. """测试从领域实体创建搜索结果 DTO(包含嵌入向量)"""
  278. # Arrange
  279. vector = Vector(dimensions=[1.0, 2.0, 3.0])
  280. doc = Document(
  281. id=EntityId.generate(),
  282. content="Test content",
  283. embedding=vector,
  284. metadata={},
  285. created_at=Timestamp.now(),
  286. updated_at=Timestamp.now()
  287. )
  288. search_result = SearchResult(document=doc, score=0.88, rank=1)
  289. # Act
  290. dto = SearchResultDTO.from_entity(search_result, include_embedding=True)
  291. # Assert
  292. assert dto.document.embedding == [1.0, 2.0, 3.0]
  293. def test_to_dict(self):
  294. """测试将搜索结果 DTO 转换为字典"""
  295. # Arrange
  296. doc_dto = DocumentDTO(
  297. id="doc_123",
  298. content="Test content",
  299. metadata={"source": "test"},
  300. created_at="2024-01-01T00:00:00",
  301. updated_at="2024-01-01T00:00:00"
  302. )
  303. result_dto = SearchResultDTO(
  304. document=doc_dto,
  305. score=0.95,
  306. rank=0
  307. )
  308. # Act
  309. data = result_dto.to_dict()
  310. # Assert
  311. assert data["score"] == 0.95
  312. assert data["rank"] == 0
  313. assert data["document"]["id"] == "doc_123"
  314. assert data["document"]["content"] == "Test content"
  315. def test_from_dict(self):
  316. """测试从字典创建搜索结果 DTO"""
  317. # Arrange
  318. data = {
  319. "document": {
  320. "id": "doc_123",
  321. "content": "Test content",
  322. "metadata": {"source": "test"},
  323. "created_at": "2024-01-01T00:00:00",
  324. "updated_at": "2024-01-01T00:00:00"
  325. },
  326. "score": 0.95,
  327. "rank": 0
  328. }
  329. # Act
  330. dto = SearchResultDTO.from_dict(data)
  331. # Assert
  332. assert dto.score == 0.95
  333. assert dto.rank == 0
  334. assert dto.document.id == "doc_123"
  335. def test_roundtrip_dict_to_dto_to_dict(self):
  336. """测试字典 -> DTO -> 字典的往返转换"""
  337. # Arrange
  338. original_data = {
  339. "document": {
  340. "id": "doc_roundtrip",
  341. "content": "Roundtrip test",
  342. "metadata": {},
  343. "created_at": "2024-01-01T00:00:00",
  344. "updated_at": "2024-01-01T00:00:00"
  345. },
  346. "score": 0.88,
  347. "rank": 2
  348. }
  349. # Act
  350. dto = SearchResultDTO.from_dict(original_data)
  351. converted_data = dto.to_dict()
  352. # Assert
  353. assert converted_data == original_data
  354. class TestSearchResponseDTO:
  355. """测试 SearchResponseDTO 类"""
  356. def test_to_dict_basic(self):
  357. """测试将搜索响应 DTO 转换为字典(基本情况)"""
  358. # Arrange
  359. doc_dto = DocumentDTO(
  360. id="doc_123",
  361. content="Test content",
  362. metadata={},
  363. created_at="2024-01-01T00:00:00",
  364. updated_at="2024-01-01T00:00:00"
  365. )
  366. result_dto = SearchResultDTO(
  367. document=doc_dto,
  368. score=0.95,
  369. rank=0
  370. )
  371. response = SearchResponseDTO(
  372. results=[result_dto],
  373. total=1,
  374. query_text="test query"
  375. )
  376. # Act
  377. data = response.to_dict()
  378. # Assert
  379. assert data["total"] == 1
  380. assert data["query_text"] == "test query"
  381. assert len(data["results"]) == 1
  382. assert data["results"][0]["score"] == 0.95
  383. assert "took_ms" not in data
  384. def test_to_dict_with_timing(self):
  385. """测试将搜索响应 DTO 转换为字典(包含耗时)"""
  386. # Arrange
  387. response = SearchResponseDTO(
  388. results=[],
  389. total=0,
  390. query_text="test query",
  391. took_ms=150
  392. )
  393. # Act
  394. data = response.to_dict()
  395. # Assert
  396. assert data["took_ms"] == 150
  397. def test_from_dict_basic(self):
  398. """测试从字典创建搜索响应 DTO"""
  399. # Arrange
  400. data = {
  401. "results": [
  402. {
  403. "document": {
  404. "id": "doc_123",
  405. "content": "Test",
  406. "metadata": {},
  407. "created_at": "2024-01-01T00:00:00",
  408. "updated_at": "2024-01-01T00:00:00"
  409. },
  410. "score": 0.95,
  411. "rank": 0
  412. }
  413. ],
  414. "total": 1,
  415. "query_text": "test query"
  416. }
  417. # Act
  418. dto = SearchResponseDTO.from_dict(data)
  419. # Assert
  420. assert dto.total == 1
  421. assert dto.query_text == "test query"
  422. assert len(dto.results) == 1
  423. assert dto.results[0].score == 0.95
  424. assert dto.took_ms is None
  425. def test_from_dict_with_timing(self):
  426. """测试从字典创建搜索响应 DTO(包含耗时)"""
  427. # Arrange
  428. data = {
  429. "results": [],
  430. "total": 0,
  431. "query_text": "test query",
  432. "took_ms": 150
  433. }
  434. # Act
  435. dto = SearchResponseDTO.from_dict(data)
  436. # Assert
  437. assert dto.took_ms == 150
  438. def test_roundtrip_dict_to_dto_to_dict(self):
  439. """测试字典 -> DTO -> 字典的往返转换"""
  440. # Arrange
  441. original_data = {
  442. "results": [
  443. {
  444. "document": {
  445. "id": "doc_1",
  446. "content": "Content 1",
  447. "metadata": {},
  448. "created_at": "2024-01-01T00:00:00",
  449. "updated_at": "2024-01-01T00:00:00"
  450. },
  451. "score": 0.95,
  452. "rank": 0
  453. },
  454. {
  455. "document": {
  456. "id": "doc_2",
  457. "content": "Content 2",
  458. "metadata": {},
  459. "created_at": "2024-01-01T00:00:00",
  460. "updated_at": "2024-01-01T00:00:00"
  461. },
  462. "score": 0.88,
  463. "rank": 1
  464. }
  465. ],
  466. "total": 2,
  467. "query_text": "test query",
  468. "took_ms": 200
  469. }
  470. # Act
  471. dto = SearchResponseDTO.from_dict(original_data)
  472. converted_data = dto.to_dict()
  473. # Assert
  474. assert converted_data == original_data
  475. def test_empty_results(self):
  476. """测试空结果列表"""
  477. # Arrange
  478. response = SearchResponseDTO(
  479. results=[],
  480. total=0,
  481. query_text="no results query"
  482. )
  483. # Act
  484. data = response.to_dict()
  485. # Assert
  486. assert data["results"] == []
  487. assert data["total"] == 0