query_routes.py 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. """
  2. This module contains all query-related routes for the LightRAG API.
  3. """
  4. import json
  5. from typing import Any, Dict, List, Literal, Optional
  6. from fastapi import APIRouter, Depends, HTTPException
  7. from lightrag.base import QueryParam
  8. from lightrag.api.utils_api import get_combined_auth_dependency
  9. from lightrag.utils import logger
  10. from pydantic import BaseModel, Field, field_validator
  11. class QueryRequest(BaseModel):
  12. query: str = Field(
  13. min_length=3,
  14. description="The query text",
  15. )
  16. mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = Field(
  17. default="mix",
  18. description="Query mode",
  19. )
  20. only_need_context: Optional[bool] = Field(
  21. default=None,
  22. description="If True, only returns the retrieved context without generating a response.",
  23. )
  24. only_need_prompt: Optional[bool] = Field(
  25. default=None,
  26. description="If True, only returns the generated prompt without producing a response.",
  27. )
  28. response_type: Optional[str] = Field(
  29. min_length=1,
  30. default=None,
  31. description="Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'.",
  32. )
  33. top_k: Optional[int] = Field(
  34. ge=1,
  35. default=None,
  36. description="Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
  37. )
  38. chunk_top_k: Optional[int] = Field(
  39. ge=1,
  40. default=None,
  41. description="Number of text chunks to retrieve initially from vector search and keep after reranking.",
  42. )
  43. max_entity_tokens: Optional[int] = Field(
  44. default=None,
  45. description="Maximum number of tokens allocated for entity context in unified token control system.",
  46. ge=1,
  47. )
  48. max_relation_tokens: Optional[int] = Field(
  49. default=None,
  50. description="Maximum number of tokens allocated for relationship context in unified token control system.",
  51. ge=1,
  52. )
  53. max_total_tokens: Optional[int] = Field(
  54. default=None,
  55. description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
  56. ge=1,
  57. )
  58. hl_keywords: list[str] = Field(
  59. default_factory=list,
  60. description="List of high-level keywords to prioritize in retrieval. Leave empty to use the LLM to generate the keywords.",
  61. )
  62. ll_keywords: list[str] = Field(
  63. default_factory=list,
  64. description="List of low-level keywords to refine retrieval focus. Leave empty to use the LLM to generate the keywords.",
  65. )
  66. conversation_history: Optional[List[Dict[str, Any]]] = Field(
  67. default=None,
  68. description="History messages are only sent to LLM for context, not used for retrieval. Format: [{'role': 'user/assistant', 'content': 'message'}].",
  69. )
  70. user_prompt: Optional[str] = Field(
  71. default=None,
  72. description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.",
  73. )
  74. enable_rerank: Optional[bool] = Field(
  75. default=None,
  76. description="Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.",
  77. )
  78. include_references: Optional[bool] = Field(
  79. default=True,
  80. description="If True, includes reference list in responses. Affects /query and /query/stream endpoints. /query/data always includes references.",
  81. )
  82. include_chunk_content: Optional[bool] = Field(
  83. default=False,
  84. description="If True, includes actual chunk text content in references. Only applies when include_references=True. Useful for evaluation and debugging.",
  85. )
  86. stream: Optional[bool] = Field(
  87. default=True,
  88. description="If True, enables streaming output for real-time responses. Only affects /query/stream endpoint.",
  89. )
  90. @field_validator("query", mode="after")
  91. @classmethod
  92. def query_strip_after(cls, query: str) -> str:
  93. return query.strip()
  94. @field_validator("conversation_history", mode="after")
  95. @classmethod
  96. def conversation_history_role_check(
  97. cls, conversation_history: List[Dict[str, Any]] | None
  98. ) -> List[Dict[str, Any]] | None:
  99. if conversation_history is None:
  100. return None
  101. for msg in conversation_history:
  102. if "role" not in msg:
  103. raise ValueError("Each message must have a 'role' key.")
  104. if not isinstance(msg["role"], str) or not msg["role"].strip():
  105. raise ValueError("Each message 'role' must be a non-empty string.")
  106. return conversation_history
  107. def to_query_params(self, is_stream: bool) -> "QueryParam":
  108. """Converts a QueryRequest instance into a QueryParam instance."""
  109. # Use Pydantic's `.model_dump(exclude_none=True)` to remove None values automatically
  110. # Exclude API-level parameters that don't belong in QueryParam
  111. request_data = self.model_dump(
  112. exclude_none=True, exclude={"query", "include_chunk_content"}
  113. )
  114. # Ensure `mode` and `stream` are set explicitly
  115. param = QueryParam(**request_data)
  116. param.stream = is_stream
  117. return param
  118. class ReferenceItem(BaseModel):
  119. """A single reference item in query responses."""
  120. reference_id: str = Field(description="Unique reference identifier")
  121. file_path: str = Field(description="Path to the source file")
  122. content: Optional[List[str]] = Field(
  123. default=None,
  124. description="List of chunk contents from this file (only present when include_chunk_content=True)",
  125. )
  126. class QueryResponse(BaseModel):
  127. response: str = Field(
  128. description="The generated response",
  129. )
  130. references: Optional[List[ReferenceItem]] = Field(
  131. default=None,
  132. description="Reference list (Disabled when include_references=False, /query/data always includes references.)",
  133. )
  134. class QueryDataResponse(BaseModel):
  135. status: str = Field(description="Query execution status")
  136. message: str = Field(description="Status message")
  137. data: Dict[str, Any] = Field(
  138. description="Query result data containing entities, relationships, chunks, and references"
  139. )
  140. metadata: Dict[str, Any] = Field(
  141. description="Query metadata including mode, keywords, and processing information"
  142. )
  143. class StreamChunkResponse(BaseModel):
  144. """Response model for streaming chunks in NDJSON format"""
  145. references: Optional[List[Dict[str, str]]] = Field(
  146. default=None,
  147. description="Reference list (only in first chunk when include_references=True)",
  148. )
  149. response: Optional[str] = Field(
  150. default=None, description="Response content chunk or complete response"
  151. )
  152. error: Optional[str] = Field(
  153. default=None, description="Error message if processing fails"
  154. )
  155. def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
  156. # Fresh router per call. A module-level instance would accumulate
  157. # duplicate routes when the factory is invoked more than once in the
  158. # same process (e.g. across tests), which triggers FastAPI's
  159. # "Duplicate Operation ID" warnings.
  160. router = APIRouter(tags=["query"])
  161. combined_auth = get_combined_auth_dependency(api_key)
  162. @router.post(
  163. "/query",
  164. response_model=QueryResponse,
  165. dependencies=[Depends(combined_auth)],
  166. responses={
  167. 200: {
  168. "description": "Successful RAG query response",
  169. "content": {
  170. "application/json": {
  171. "schema": {
  172. "type": "object",
  173. "properties": {
  174. "response": {
  175. "type": "string",
  176. "description": "The generated response from the RAG system",
  177. },
  178. "references": {
  179. "type": "array",
  180. "items": {
  181. "type": "object",
  182. "properties": {
  183. "reference_id": {"type": "string"},
  184. "file_path": {"type": "string"},
  185. "content": {
  186. "type": "array",
  187. "items": {"type": "string"},
  188. "description": "List of chunk contents from this file (only included when include_chunk_content=True)",
  189. },
  190. },
  191. },
  192. "description": "Reference list (only included when include_references=True)",
  193. },
  194. },
  195. "required": ["response"],
  196. },
  197. "examples": {
  198. "with_references": {
  199. "summary": "Response with references",
  200. "description": "Example response when include_references=True",
  201. "value": {
  202. "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
  203. "references": [
  204. {
  205. "reference_id": "1",
  206. "file_path": "/documents/ai_overview.pdf",
  207. },
  208. {
  209. "reference_id": "2",
  210. "file_path": "/documents/machine_learning.txt",
  211. },
  212. ],
  213. },
  214. },
  215. "with_chunk_content": {
  216. "summary": "Response with chunk content",
  217. "description": "Example response when include_references=True and include_chunk_content=True. Note: content is an array of chunks from the same file.",
  218. "value": {
  219. "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
  220. "references": [
  221. {
  222. "reference_id": "1",
  223. "file_path": "/documents/ai_overview.pdf",
  224. "content": [
  225. "Artificial Intelligence (AI) represents a transformative field in computer science focused on creating systems that can perform tasks requiring human-like intelligence. These tasks include learning from experience, understanding natural language, recognizing patterns, and making decisions.",
  226. "AI systems can be categorized into narrow AI, which is designed for specific tasks, and general AI, which aims to match human cognitive abilities across a wide range of domains.",
  227. ],
  228. },
  229. {
  230. "reference_id": "2",
  231. "file_path": "/documents/machine_learning.txt",
  232. "content": [
  233. "Machine learning is a subset of AI that enables computers to learn and improve from experience without being explicitly programmed. It focuses on the development of algorithms that can access data and use it to learn for themselves."
  234. ],
  235. },
  236. ],
  237. },
  238. },
  239. "without_references": {
  240. "summary": "Response without references",
  241. "description": "Example response when include_references=False",
  242. "value": {
  243. "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving."
  244. },
  245. },
  246. "different_modes": {
  247. "summary": "Different query modes",
  248. "description": "Examples of responses from different query modes",
  249. "value": {
  250. "local_mode": "Focuses on specific entities and their relationships",
  251. "global_mode": "Provides broader context from relationship patterns",
  252. "hybrid_mode": "Combines local and global approaches",
  253. "naive_mode": "Simple vector similarity search",
  254. "mix_mode": "Integrates knowledge graph and vector retrieval",
  255. },
  256. },
  257. },
  258. }
  259. },
  260. },
  261. 400: {
  262. "description": "Bad Request - Invalid input parameters",
  263. "content": {
  264. "application/json": {
  265. "schema": {
  266. "type": "object",
  267. "properties": {"detail": {"type": "string"}},
  268. },
  269. "example": {
  270. "detail": "Query text must be at least 3 characters long"
  271. },
  272. }
  273. },
  274. },
  275. 500: {
  276. "description": "Internal Server Error - Query processing failed",
  277. "content": {
  278. "application/json": {
  279. "schema": {
  280. "type": "object",
  281. "properties": {"detail": {"type": "string"}},
  282. },
  283. "example": {
  284. "detail": "Failed to process query: LLM service unavailable"
  285. },
  286. }
  287. },
  288. },
  289. },
  290. )
  291. async def query_text(request: QueryRequest):
  292. """
  293. Comprehensive RAG query endpoint with non-streaming response. Parameter "stream" is ignored.
  294. This endpoint performs Retrieval-Augmented Generation (RAG) queries using various modes
  295. to provide intelligent responses based on your knowledge base.
  296. **Query Modes:**
  297. - **local**: Focuses on specific entities and their direct relationships
  298. - **global**: Analyzes broader patterns and relationships across the knowledge graph
  299. - **hybrid**: Combines local and global approaches for comprehensive results
  300. - **naive**: Simple vector similarity search without knowledge graph
  301. - **mix**: Integrates knowledge graph retrieval with vector search (recommended)
  302. - **bypass**: Direct LLM query without knowledge retrieval
  303. conversation_history parameteris sent to LLM only, does not affect retrieval results.
  304. **Usage Examples:**
  305. Basic query:
  306. ```json
  307. {
  308. "query": "What is machine learning?",
  309. "mode": "mix"
  310. }
  311. ```
  312. Bypass initial LLM call by providing high-level and low-level keywords:
  313. ```json
  314. {
  315. "query": "What is Retrieval-Augmented-Generation?",
  316. "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
  317. "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
  318. "mode": "mix"
  319. }
  320. ```
  321. Advanced query with references:
  322. ```json
  323. {
  324. "query": "Explain neural networks",
  325. "mode": "hybrid",
  326. "include_references": true,
  327. "response_type": "Multiple Paragraphs",
  328. "top_k": 10
  329. }
  330. ```
  331. Conversation with history:
  332. ```json
  333. {
  334. "query": "Can you give me more details?",
  335. "conversation_history": [
  336. {"role": "user", "content": "What is AI?"},
  337. {"role": "assistant", "content": "AI is artificial intelligence..."}
  338. ]
  339. }
  340. ```
  341. Args:
  342. request (QueryRequest): The request object containing query parameters:
  343. - **query**: The question or prompt to process (min 3 characters)
  344. - **mode**: Query strategy - "mix" recommended for best results
  345. - **include_references**: Whether to include source citations
  346. - **response_type**: Format preference (e.g., "Multiple Paragraphs")
  347. - **top_k**: Number of top entities/relations to retrieve
  348. - **conversation_history**: Previous dialogue context
  349. - **max_total_tokens**: Token budget for the entire response
  350. Returns:
  351. QueryResponse: JSON response containing:
  352. - **response**: The generated answer to your query
  353. - **references**: Source citations (if include_references=True)
  354. Raises:
  355. HTTPException:
  356. - 400: Invalid input parameters (e.g., query too short)
  357. - 500: Internal processing error (e.g., LLM service unavailable)
  358. """
  359. try:
  360. param = request.to_query_params(
  361. False
  362. ) # Ensure stream=False for non-streaming endpoint
  363. # Force stream=False for /query endpoint regardless of include_references setting
  364. param.stream = False
  365. # Unified approach: always use aquery_llm for both cases
  366. result = await rag.aquery_llm(request.query, param=param)
  367. # Extract LLM response and references from unified result
  368. llm_response = result.get("llm_response", {})
  369. data = result.get("data", {})
  370. references = data.get("references", [])
  371. # Get the non-streaming response content
  372. response_content = llm_response.get("content", "")
  373. if not response_content:
  374. response_content = "No relevant context found for the query."
  375. # Enrich references with chunk content if requested
  376. if request.include_references and request.include_chunk_content:
  377. chunks = data.get("chunks", [])
  378. # Create a mapping from reference_id to chunk content
  379. ref_id_to_content = {}
  380. for chunk in chunks:
  381. ref_id = chunk.get("reference_id", "")
  382. content = chunk.get("content", "")
  383. if ref_id and content:
  384. # Collect chunk content; join later to avoid quadratic string concatenation
  385. ref_id_to_content.setdefault(ref_id, []).append(content)
  386. # Add content to references
  387. enriched_references = []
  388. for ref in references:
  389. ref_copy = ref.copy()
  390. ref_id = ref.get("reference_id", "")
  391. if ref_id in ref_id_to_content:
  392. # Keep content as a list of chunks (one file may have multiple chunks)
  393. ref_copy["content"] = ref_id_to_content[ref_id]
  394. enriched_references.append(ref_copy)
  395. references = enriched_references
  396. # Return response with or without references based on request
  397. if request.include_references:
  398. return QueryResponse(response=response_content, references=references)
  399. else:
  400. return QueryResponse(response=response_content, references=None)
  401. except Exception as e:
  402. logger.error(f"Error processing query: {str(e)}", exc_info=True)
  403. raise HTTPException(status_code=500, detail=str(e))
  404. @router.post(
  405. "/query/stream",
  406. dependencies=[Depends(combined_auth)],
  407. responses={
  408. 200: {
  409. "description": "Flexible RAG query response - format depends on stream parameter",
  410. "content": {
  411. "application/x-ndjson": {
  412. "schema": {
  413. "type": "string",
  414. "format": "ndjson",
  415. "description": "Newline-delimited JSON (NDJSON) format used for both streaming and non-streaming responses. For streaming: multiple lines with separate JSON objects. For non-streaming: single line with complete JSON object.",
  416. "example": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"response": " a field of computer science"}\n{"response": " that focuses on creating intelligent machines."}',
  417. },
  418. "examples": {
  419. "streaming_with_references": {
  420. "summary": "Streaming mode with references (stream=true)",
  421. "description": "Multiple NDJSON lines when stream=True and include_references=True. First line contains references, subsequent lines contain response chunks.",
  422. "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai_overview.pdf"}, {"reference_id": "2", "file_path": "/documents/ml_basics.txt"}]}\n{"response": "Artificial Intelligence (AI) is a branch of computer science"}\n{"response": " that aims to create intelligent machines capable of performing"}\n{"response": " tasks that typically require human intelligence, such as learning,"}\n{"response": " reasoning, and problem-solving."}',
  423. },
  424. "streaming_with_chunk_content": {
  425. "summary": "Streaming mode with chunk content (stream=true, include_chunk_content=true)",
  426. "description": "Multiple NDJSON lines when stream=True, include_references=True, and include_chunk_content=True. First line contains references with content arrays (one file may have multiple chunks), subsequent lines contain response chunks.",
  427. "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai_overview.pdf", "content": ["Artificial Intelligence (AI) represents a transformative field...", "AI systems can be categorized into narrow AI and general AI..."]}, {"reference_id": "2", "file_path": "/documents/ml_basics.txt", "content": ["Machine learning is a subset of AI that enables computers to learn..."]}]}\n{"response": "Artificial Intelligence (AI) is a branch of computer science"}\n{"response": " that aims to create intelligent machines capable of performing"}\n{"response": " tasks that typically require human intelligence."}',
  428. },
  429. "streaming_without_references": {
  430. "summary": "Streaming mode without references (stream=true)",
  431. "description": "Multiple NDJSON lines when stream=True and include_references=False. Only response chunks are sent.",
  432. "value": '{"response": "Machine learning is a subset of artificial intelligence"}\n{"response": " that enables computers to learn and improve from experience"}\n{"response": " without being explicitly programmed for every task."}',
  433. },
  434. "non_streaming_with_references": {
  435. "summary": "Non-streaming mode with references (stream=false)",
  436. "description": "Single NDJSON line when stream=False and include_references=True. Complete response with references in one message.",
  437. "value": '{"references": [{"reference_id": "1", "file_path": "/documents/neural_networks.pdf"}], "response": "Neural networks are computational models inspired by biological neural networks that consist of interconnected nodes (neurons) organized in layers. They are fundamental to deep learning and can learn complex patterns from data through training processes."}',
  438. },
  439. "non_streaming_without_references": {
  440. "summary": "Non-streaming mode without references (stream=false)",
  441. "description": "Single NDJSON line when stream=False and include_references=False. Complete response only.",
  442. "value": '{"response": "Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence deep) to model and understand complex patterns in data. It has revolutionized fields like computer vision, natural language processing, and speech recognition."}',
  443. },
  444. "error_response": {
  445. "summary": "Error during streaming",
  446. "description": "Error handling in NDJSON format when an error occurs during processing.",
  447. "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"error": "LLM service temporarily unavailable"}',
  448. },
  449. },
  450. }
  451. },
  452. },
  453. 400: {
  454. "description": "Bad Request - Invalid input parameters",
  455. "content": {
  456. "application/json": {
  457. "schema": {
  458. "type": "object",
  459. "properties": {"detail": {"type": "string"}},
  460. },
  461. "example": {
  462. "detail": "Query text must be at least 3 characters long"
  463. },
  464. }
  465. },
  466. },
  467. 500: {
  468. "description": "Internal Server Error - Query processing failed",
  469. "content": {
  470. "application/json": {
  471. "schema": {
  472. "type": "object",
  473. "properties": {"detail": {"type": "string"}},
  474. },
  475. "example": {
  476. "detail": "Failed to process streaming query: Knowledge graph unavailable"
  477. },
  478. }
  479. },
  480. },
  481. },
  482. )
  483. async def query_text_stream(request: QueryRequest):
  484. """
  485. Advanced RAG query endpoint with flexible streaming response.
  486. This endpoint provides the most flexible querying experience, supporting both real-time streaming
  487. and complete response delivery based on your integration needs.
  488. **Response Modes:**
  489. - Real-time response delivery as content is generated
  490. - NDJSON format: each line is a separate JSON object
  491. - First line: `{"references": [...]}` (if include_references=True)
  492. - Subsequent lines: `{"response": "content chunk"}`
  493. - Error handling: `{"error": "error message"}`
  494. > If stream parameter is False, or the query hit LLM cache, complete response delivered in a single streaming message.
  495. **Response Format Details**
  496. - **Content-Type**: `application/x-ndjson` (Newline-Delimited JSON)
  497. - **Structure**: Each line is an independent, valid JSON object
  498. - **Parsing**: Process line-by-line, each line is self-contained
  499. - **Headers**: Includes cache control and connection management
  500. **Query Modes (same as /query endpoint)**
  501. - **local**: Entity-focused retrieval with direct relationships
  502. - **global**: Pattern analysis across the knowledge graph
  503. - **hybrid**: Combined local and global strategies
  504. - **naive**: Vector similarity search only
  505. - **mix**: Integrated knowledge graph + vector retrieval (recommended)
  506. - **bypass**: Direct LLM query without knowledge retrieval
  507. conversation_history parameteris sent to LLM only, does not affect retrieval results.
  508. **Usage Examples**
  509. Real-time streaming query:
  510. ```json
  511. {
  512. "query": "Explain machine learning algorithms",
  513. "mode": "mix",
  514. "stream": true,
  515. "include_references": true
  516. }
  517. ```
  518. Bypass initial LLM call by providing high-level and low-level keywords:
  519. ```json
  520. {
  521. "query": "What is Retrieval-Augmented-Generation?",
  522. "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
  523. "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
  524. "mode": "mix"
  525. }
  526. ```
  527. Complete response query:
  528. ```json
  529. {
  530. "query": "What is deep learning?",
  531. "mode": "hybrid",
  532. "stream": false,
  533. "response_type": "Multiple Paragraphs"
  534. }
  535. ```
  536. Conversation with context:
  537. ```json
  538. {
  539. "query": "Can you elaborate on that?",
  540. "stream": true,
  541. "conversation_history": [
  542. {"role": "user", "content": "What is neural network?"},
  543. {"role": "assistant", "content": "A neural network is..."}
  544. ]
  545. }
  546. ```
  547. **Response Processing:**
  548. ```python
  549. async for line in response.iter_lines():
  550. data = json.loads(line)
  551. if "references" in data:
  552. # Handle references (first message)
  553. references = data["references"]
  554. if "response" in data:
  555. # Handle content chunk
  556. content_chunk = data["response"]
  557. if "error" in data:
  558. # Handle error
  559. error_message = data["error"]
  560. ```
  561. **Error Handling:**
  562. - Streaming errors are delivered as `{"error": "message"}` lines
  563. - Non-streaming errors raise HTTP exceptions
  564. - Partial responses may be delivered before errors in streaming mode
  565. - Always check for error objects when processing streaming responses
  566. Args:
  567. request (QueryRequest): The request object containing query parameters:
  568. - **query**: The question or prompt to process (min 3 characters)
  569. - **mode**: Query strategy - "mix" recommended for best results
  570. - **stream**: Enable streaming (True) or complete response (False)
  571. - **include_references**: Whether to include source citations
  572. - **response_type**: Format preference (e.g., "Multiple Paragraphs")
  573. - **top_k**: Number of top entities/relations to retrieve
  574. - **conversation_history**: Previous dialogue context for multi-turn conversations
  575. - **max_total_tokens**: Token budget for the entire response
  576. Returns:
  577. StreamingResponse: NDJSON streaming response containing:
  578. - **Streaming mode**: Multiple JSON objects, one per line
  579. - References object (if requested): `{"references": [...]}`
  580. - Content chunks: `{"response": "chunk content"}`
  581. - Error objects: `{"error": "error message"}`
  582. - **Non-streaming mode**: Single JSON object
  583. - Complete response: `{"references": [...], "response": "complete content"}`
  584. Raises:
  585. HTTPException:
  586. - 400: Invalid input parameters (e.g., query too short, invalid mode)
  587. - 500: Internal processing error (e.g., LLM service unavailable)
  588. Note:
  589. This endpoint is ideal for applications requiring flexible response delivery.
  590. Use streaming mode for real-time interfaces and non-streaming for batch processing.
  591. """
  592. try:
  593. # Use the stream parameter from the request, defaulting to True if not specified
  594. stream_mode = request.stream if request.stream is not None else True
  595. param = request.to_query_params(stream_mode)
  596. from fastapi.responses import StreamingResponse
  597. # Unified approach: always use aquery_llm for all cases
  598. result = await rag.aquery_llm(request.query, param=param)
  599. async def stream_generator():
  600. # Extract references and LLM response from unified result
  601. references = result.get("data", {}).get("references", [])
  602. llm_response = result.get("llm_response", {})
  603. # Enrich references with chunk content if requested
  604. if request.include_references and request.include_chunk_content:
  605. data = result.get("data", {})
  606. chunks = data.get("chunks", [])
  607. # Create a mapping from reference_id to chunk content
  608. ref_id_to_content = {}
  609. for chunk in chunks:
  610. ref_id = chunk.get("reference_id", "")
  611. content = chunk.get("content", "")
  612. if ref_id and content:
  613. # Collect chunk content
  614. ref_id_to_content.setdefault(ref_id, []).append(content)
  615. # Add content to references
  616. enriched_references = []
  617. for ref in references:
  618. ref_copy = ref.copy()
  619. ref_id = ref.get("reference_id", "")
  620. if ref_id in ref_id_to_content:
  621. # Keep content as a list of chunks (one file may have multiple chunks)
  622. ref_copy["content"] = ref_id_to_content[ref_id]
  623. enriched_references.append(ref_copy)
  624. references = enriched_references
  625. if llm_response.get("is_streaming"):
  626. # Streaming mode: send references first, then stream response chunks
  627. if request.include_references:
  628. yield f"{json.dumps({'references': references})}\n"
  629. response_stream = llm_response.get("response_iterator")
  630. if response_stream:
  631. try:
  632. async for chunk in response_stream:
  633. if chunk: # Only send non-empty content
  634. yield f"{json.dumps({'response': chunk})}\n"
  635. except Exception as e:
  636. logger.error(f"Streaming error: {str(e)}")
  637. yield f"{json.dumps({'error': str(e)})}\n"
  638. else:
  639. # Non-streaming mode: send complete response in one message
  640. response_content = llm_response.get("content", "")
  641. if not response_content:
  642. response_content = "No relevant context found for the query."
  643. # Create complete response object
  644. complete_response = {"response": response_content}
  645. if request.include_references:
  646. complete_response["references"] = references
  647. yield f"{json.dumps(complete_response)}\n"
  648. return StreamingResponse(
  649. stream_generator(),
  650. media_type="application/x-ndjson",
  651. headers={
  652. "Cache-Control": "no-cache",
  653. "Connection": "keep-alive",
  654. "Content-Type": "application/x-ndjson",
  655. "X-Accel-Buffering": "no", # Ensure proper handling of streaming response when proxied by Nginx
  656. },
  657. )
  658. except Exception as e:
  659. logger.error(f"Error processing streaming query: {str(e)}", exc_info=True)
  660. raise HTTPException(status_code=500, detail=str(e))
  661. @router.post(
  662. "/query/data",
  663. response_model=QueryDataResponse,
  664. dependencies=[Depends(combined_auth)],
  665. responses={
  666. 200: {
  667. "description": "Successful data retrieval response with structured RAG data",
  668. "content": {
  669. "application/json": {
  670. "schema": {
  671. "type": "object",
  672. "properties": {
  673. "status": {
  674. "type": "string",
  675. "enum": ["success", "failure"],
  676. "description": "Query execution status",
  677. },
  678. "message": {
  679. "type": "string",
  680. "description": "Status message describing the result",
  681. },
  682. "data": {
  683. "type": "object",
  684. "properties": {
  685. "entities": {
  686. "type": "array",
  687. "items": {
  688. "type": "object",
  689. "properties": {
  690. "entity_name": {"type": "string"},
  691. "entity_type": {"type": "string"},
  692. "description": {"type": "string"},
  693. "source_id": {"type": "string"},
  694. "file_path": {"type": "string"},
  695. "reference_id": {"type": "string"},
  696. },
  697. },
  698. "description": "Retrieved entities from knowledge graph",
  699. },
  700. "relationships": {
  701. "type": "array",
  702. "items": {
  703. "type": "object",
  704. "properties": {
  705. "src_id": {"type": "string"},
  706. "tgt_id": {"type": "string"},
  707. "description": {"type": "string"},
  708. "keywords": {"type": "string"},
  709. "weight": {"type": "number"},
  710. "source_id": {"type": "string"},
  711. "file_path": {"type": "string"},
  712. "reference_id": {"type": "string"},
  713. },
  714. },
  715. "description": "Retrieved relationships from knowledge graph",
  716. },
  717. "chunks": {
  718. "type": "array",
  719. "items": {
  720. "type": "object",
  721. "properties": {
  722. "content": {"type": "string"},
  723. "file_path": {"type": "string"},
  724. "chunk_id": {"type": "string"},
  725. "reference_id": {"type": "string"},
  726. },
  727. },
  728. "description": "Retrieved text chunks from vector database",
  729. },
  730. "references": {
  731. "type": "array",
  732. "items": {
  733. "type": "object",
  734. "properties": {
  735. "reference_id": {"type": "string"},
  736. "file_path": {"type": "string"},
  737. },
  738. },
  739. "description": "Reference list for citation purposes",
  740. },
  741. },
  742. "description": "Structured retrieval data containing entities, relationships, chunks, and references",
  743. },
  744. "metadata": {
  745. "type": "object",
  746. "properties": {
  747. "query_mode": {"type": "string"},
  748. "keywords": {
  749. "type": "object",
  750. "properties": {
  751. "high_level": {
  752. "type": "array",
  753. "items": {"type": "string"},
  754. },
  755. "low_level": {
  756. "type": "array",
  757. "items": {"type": "string"},
  758. },
  759. },
  760. },
  761. "processing_info": {
  762. "type": "object",
  763. "properties": {
  764. "total_entities_found": {
  765. "type": "integer"
  766. },
  767. "total_relations_found": {
  768. "type": "integer"
  769. },
  770. "entities_after_truncation": {
  771. "type": "integer"
  772. },
  773. "relations_after_truncation": {
  774. "type": "integer"
  775. },
  776. "final_chunks_count": {
  777. "type": "integer"
  778. },
  779. },
  780. },
  781. },
  782. "description": "Query metadata including mode, keywords, and processing information",
  783. },
  784. },
  785. "required": ["status", "message", "data", "metadata"],
  786. },
  787. "examples": {
  788. "successful_local_mode": {
  789. "summary": "Local mode data retrieval",
  790. "description": "Example of structured data from local mode query focusing on specific entities",
  791. "value": {
  792. "status": "success",
  793. "message": "Query executed successfully",
  794. "data": {
  795. "entities": [
  796. {
  797. "entity_name": "Neural Networks",
  798. "entity_type": "CONCEPT",
  799. "description": "Computational models inspired by biological neural networks",
  800. "source_id": "chunk-123",
  801. "file_path": "/documents/ai_basics.pdf",
  802. "reference_id": "1",
  803. }
  804. ],
  805. "relationships": [
  806. {
  807. "src_id": "Neural Networks",
  808. "tgt_id": "Machine Learning",
  809. "description": "Neural networks are a subset of machine learning algorithms",
  810. "keywords": "subset, algorithm, learning",
  811. "weight": 0.85,
  812. "source_id": "chunk-123",
  813. "file_path": "/documents/ai_basics.pdf",
  814. "reference_id": "1",
  815. }
  816. ],
  817. "chunks": [
  818. {
  819. "content": "Neural networks are computational models that mimic the way biological neural networks work...",
  820. "file_path": "/documents/ai_basics.pdf",
  821. "chunk_id": "chunk-123",
  822. "reference_id": "1",
  823. }
  824. ],
  825. "references": [
  826. {
  827. "reference_id": "1",
  828. "file_path": "/documents/ai_basics.pdf",
  829. }
  830. ],
  831. },
  832. "metadata": {
  833. "query_mode": "local",
  834. "keywords": {
  835. "high_level": ["neural", "networks"],
  836. "low_level": [
  837. "computation",
  838. "model",
  839. "algorithm",
  840. ],
  841. },
  842. "processing_info": {
  843. "total_entities_found": 5,
  844. "total_relations_found": 3,
  845. "entities_after_truncation": 1,
  846. "relations_after_truncation": 1,
  847. "final_chunks_count": 1,
  848. },
  849. },
  850. },
  851. },
  852. "global_mode": {
  853. "summary": "Global mode data retrieval",
  854. "description": "Example of structured data from global mode query analyzing broader patterns",
  855. "value": {
  856. "status": "success",
  857. "message": "Query executed successfully",
  858. "data": {
  859. "entities": [],
  860. "relationships": [
  861. {
  862. "src_id": "Artificial Intelligence",
  863. "tgt_id": "Machine Learning",
  864. "description": "AI encompasses machine learning as a core component",
  865. "keywords": "encompasses, component, field",
  866. "weight": 0.92,
  867. "source_id": "chunk-456",
  868. "file_path": "/documents/ai_overview.pdf",
  869. "reference_id": "2",
  870. }
  871. ],
  872. "chunks": [],
  873. "references": [
  874. {
  875. "reference_id": "2",
  876. "file_path": "/documents/ai_overview.pdf",
  877. }
  878. ],
  879. },
  880. "metadata": {
  881. "query_mode": "global",
  882. "keywords": {
  883. "high_level": [
  884. "artificial",
  885. "intelligence",
  886. "overview",
  887. ],
  888. "low_level": [],
  889. },
  890. },
  891. },
  892. },
  893. "naive_mode": {
  894. "summary": "Naive mode data retrieval",
  895. "description": "Example of structured data from naive mode using only vector search",
  896. "value": {
  897. "status": "success",
  898. "message": "Query executed successfully",
  899. "data": {
  900. "entities": [],
  901. "relationships": [],
  902. "chunks": [
  903. {
  904. "content": "Deep learning is a subset of machine learning that uses neural networks with multiple layers...",
  905. "file_path": "/documents/deep_learning.pdf",
  906. "chunk_id": "chunk-789",
  907. "reference_id": "3",
  908. }
  909. ],
  910. "references": [
  911. {
  912. "reference_id": "3",
  913. "file_path": "/documents/deep_learning.pdf",
  914. }
  915. ],
  916. },
  917. "metadata": {
  918. "query_mode": "naive",
  919. "keywords": {"high_level": [], "low_level": []},
  920. },
  921. },
  922. },
  923. },
  924. }
  925. },
  926. },
  927. 400: {
  928. "description": "Bad Request - Invalid input parameters",
  929. "content": {
  930. "application/json": {
  931. "schema": {
  932. "type": "object",
  933. "properties": {"detail": {"type": "string"}},
  934. },
  935. "example": {
  936. "detail": "Query text must be at least 3 characters long"
  937. },
  938. }
  939. },
  940. },
  941. 500: {
  942. "description": "Internal Server Error - Data retrieval failed",
  943. "content": {
  944. "application/json": {
  945. "schema": {
  946. "type": "object",
  947. "properties": {"detail": {"type": "string"}},
  948. },
  949. "example": {
  950. "detail": "Failed to retrieve data: Knowledge graph unavailable"
  951. },
  952. }
  953. },
  954. },
  955. },
  956. )
  957. async def query_data(request: QueryRequest):
  958. """
  959. Advanced data retrieval endpoint for structured RAG analysis.
  960. This endpoint provides raw retrieval results without LLM generation, perfect for:
  961. - **Data Analysis**: Examine what information would be used for RAG
  962. - **System Integration**: Get structured data for custom processing
  963. - **Debugging**: Understand retrieval behavior and quality
  964. - **Research**: Analyze knowledge graph structure and relationships
  965. **Key Features:**
  966. - No LLM generation - pure data retrieval
  967. - Complete structured output with entities, relationships, and chunks
  968. - Always includes references for citation
  969. - Detailed metadata about processing and keywords
  970. - Compatible with all query modes and parameters
  971. **Query Mode Behaviors:**
  972. - **local**: Returns entities and their direct relationships + related chunks
  973. - **global**: Returns relationship patterns across the knowledge graph
  974. - **hybrid**: Combines local and global retrieval strategies
  975. - **naive**: Returns only vector-retrieved text chunks (no knowledge graph)
  976. - **mix**: Integrates knowledge graph data with vector-retrieved chunks
  977. - **bypass**: Returns empty data arrays (used for direct LLM queries)
  978. **Data Structure:**
  979. - **entities**: Knowledge graph entities with descriptions and metadata
  980. - **relationships**: Connections between entities with weights and descriptions
  981. - **chunks**: Text segments from documents with source information
  982. - **references**: Citation information mapping reference IDs to file paths
  983. - **metadata**: Processing information, keywords, and query statistics
  984. **Usage Examples:**
  985. Analyze entity relationships:
  986. ```json
  987. {
  988. "query": "machine learning algorithms",
  989. "mode": "local",
  990. "top_k": 10
  991. }
  992. ```
  993. Explore global patterns:
  994. ```json
  995. {
  996. "query": "artificial intelligence trends",
  997. "mode": "global",
  998. "max_relation_tokens": 2000
  999. }
  1000. ```
  1001. Vector similarity search:
  1002. ```json
  1003. {
  1004. "query": "neural network architectures",
  1005. "mode": "naive",
  1006. "chunk_top_k": 5
  1007. }
  1008. ```
  1009. Bypass initial LLM call by providing high-level and low-level keywords:
  1010. ```json
  1011. {
  1012. "query": "What is Retrieval-Augmented-Generation?",
  1013. "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
  1014. "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
  1015. "mode": "mix"
  1016. }
  1017. ```
  1018. **Response Analysis:**
  1019. - **Empty arrays**: Normal for certain modes (e.g., naive mode has no entities/relationships)
  1020. - **Processing info**: Shows retrieval statistics and token usage
  1021. - **Keywords**: High-level and low-level keywords extracted from query
  1022. - **Reference mapping**: Links all data back to source documents
  1023. Args:
  1024. request (QueryRequest): The request object containing query parameters:
  1025. - **query**: The search query to analyze (min 3 characters)
  1026. - **mode**: Retrieval strategy affecting data types returned
  1027. - **top_k**: Number of top entities/relationships to retrieve
  1028. - **chunk_top_k**: Number of text chunks to retrieve
  1029. - **max_entity_tokens**: Token limit for entity context
  1030. - **max_relation_tokens**: Token limit for relationship context
  1031. - **max_total_tokens**: Overall token budget for retrieval
  1032. Returns:
  1033. QueryDataResponse: Structured JSON response containing:
  1034. - **status**: "success" or "failure"
  1035. - **message**: Human-readable status description
  1036. - **data**: Complete retrieval results with entities, relationships, chunks, references
  1037. - **metadata**: Query processing information and statistics
  1038. Raises:
  1039. HTTPException:
  1040. - 400: Invalid input parameters (e.g., query too short, invalid mode)
  1041. - 500: Internal processing error (e.g., knowledge graph unavailable)
  1042. Note:
  1043. This endpoint always includes references regardless of the include_references parameter,
  1044. as structured data analysis typically requires source attribution.
  1045. """
  1046. try:
  1047. param = request.to_query_params(False) # No streaming for data endpoint
  1048. response = await rag.aquery_data(request.query, param=param)
  1049. # aquery_data returns the new format with status, message, data, and metadata
  1050. if isinstance(response, dict):
  1051. return QueryDataResponse(**response)
  1052. else:
  1053. # Handle unexpected response format
  1054. return QueryDataResponse(
  1055. status="failure",
  1056. message="Invalid response type",
  1057. data={},
  1058. metadata={},
  1059. )
  1060. except Exception as e:
  1061. logger.error(f"Error processing data query: {str(e)}", exc_info=True)
  1062. raise HTTPException(status_code=500, detail=str(e))
  1063. return router