env.docker-compose-full 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. ### All configurable environment variable must show up in this sample file in active or comment out status
  2. ### Setup tool `make env-*` uses this file to generate final .env file
  3. ### Target environment of this env file: host/compose (compose is for Dokcer or Kubernetes)
  4. LIGHTRAG_RUNTIME_TARGET=compose
  5. ###########################
  6. ### Server Configuration
  7. ###########################
  8. HOST=0.0.0.0
  9. PORT=9621
  10. WEBUI_TITLE='My Graph KB'
  11. WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
  12. # WORKERS=2
  13. ### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
  14. # TIMEOUT=150
  15. # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
  16. ### Path Prefix Configuration (Optional)
  17. ### Used to host multiple LightRAG instances on one host behind a reverse
  18. ### proxy that routes by site prefix. Leave unset (or empty) for a
  19. ### single-instance deployment.
  20. ###
  21. ### - LIGHTRAG_API_PREFIX : reverse-proxy prefix the upstream proxy strips
  22. ### before forwarding (passed to FastAPI as root_path).
  23. ###
  24. ### See docs/MultiSiteDeployment.md for end-to-end examples.
  25. # LIGHTRAG_API_PREFIX=/site01
  26. ### Optional SSL Configuration
  27. ### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container
  28. # SSL=true
  29. # SSL_CERTFILE=/path/to/cert.pem
  30. # SSL_KEYFILE=/path/to/key.pem
  31. ### Directory Configuration (defaults to current working directory)
  32. ### Default value is: ./inputs ./rag_storage
  33. # INPUT_DIR=<absolute_path_for_doc_input_dir>
  34. # WORKING_DIR=<absolute_path_for_working_dir>
  35. ### Tiktoken cache directory (Store cached files in this folder for offline deployment)
  36. # TIKTOKEN_CACHE_DIR=/app/data/tiktoken
  37. ### Ollama Emulating Model and Tag
  38. # OLLAMA_EMULATING_MODEL_NAME=lightrag
  39. OLLAMA_EMULATING_MODEL_TAG=latest
  40. ### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
  41. # MAX_GRAPH_NODES=1000
  42. ### Logging level
  43. # LOG_LEVEL=INFO
  44. # VERBOSE=False
  45. # LOG_MAX_BYTES=10485760
  46. # LOG_BACKUP_COUNT=5
  47. ### Logfile location (defaults to current working directory)
  48. # LOG_DIR=/path/to/log/directory
  49. # LIGHTRAG_PERFORMANCE_TIMING_LOGS=false
  50. #####################################
  51. ### Login and API-Key Configuration
  52. #####################################
  53. # AUTH_ACCOUNTS='admin:admin123,user1:{bcrypt}$2b$12$S8Yu.gCbuAbNTJFB.231gegTwr5pgrFxc8H9kXQ4/sduFBHkhM8Ka'
  54. # TOKEN_SECRET=lightrag-jwt-default-secret-key!
  55. # JWT_ALGORITHM=HS256
  56. # TOKEN_EXPIRE_HOURS=48
  57. # GUEST_TOKEN_EXPIRE_HOURS=24
  58. ### Token Auto-Renewal Configuration (Sliding Window Expiration)
  59. ### Enable automatic token renewal to prevent active users from being logged out
  60. ### When enabled, tokens will be automatically renewed when remaining time < threshold
  61. # TOKEN_AUTO_RENEW=true
  62. ### Token renewal threshold (0.0 - 1.0)
  63. ### Renew token when remaining time < (total time * threshold)
  64. ### Default: 0.5 (renew when 50% time remaining)
  65. ### Examples:
  66. ### 0.5 = renew when 24h token has 12h left
  67. ### 0.25 = renew when 24h token has 6h left
  68. # TOKEN_RENEW_THRESHOLD=0.5
  69. ### Note: Token renewal is automatically skipped for certain endpoints:
  70. ### - /health: Health check endpoint (no authentication required)
  71. ### - /documents/paginated: Frequently polled by client (5-30s interval)
  72. ### - /documents/pipeline_status: Very frequently polled by client (2s interval)
  73. ### - Rate limit: Minimum 60 seconds between renewals for same user
  74. ### API-Key to access LightRAG Server API
  75. ### Use this key in HTTP requests with the 'X-API-Key' header
  76. ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
  77. # LIGHTRAG_API_KEY=your-secure-api-key-here
  78. # WHITELIST_PATHS=/health,/api/*
  79. ######################################################################################
  80. ### Query Configuration
  81. ###
  82. ### How to control the context length sent to LLM:
  83. ### MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
  84. ### Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens
  85. ######################################################################################
  86. # LLM response cache for query (Not valid for streaming response)
  87. ENABLE_LLM_CACHE=true
  88. # COSINE_THRESHOLD=0.2
  89. ### Number of entities or relations retrieved from KG
  90. # TOP_K=40
  91. ### Maximum number or chunks for naive vector search
  92. # CHUNK_TOP_K=20
  93. ### control the actual entities send to LLM
  94. # MAX_ENTITY_TOKENS=6000
  95. ### control the actual relations send to LLM
  96. # MAX_RELATION_TOKENS=8000
  97. ### control the maximum tokens send to LLM (include entities, relations and chunks)
  98. # MAX_TOTAL_TOKENS=30000
  99. ### chunk selection strategies
  100. ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
  101. ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
  102. ### If reranking is enabled, the impact of chunk selection strategies will be diminished.
  103. # KG_CHUNK_PICK_METHOD=VECTOR
  104. ### maximum number of related chunks per source entity or relation
  105. ### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
  106. ### Higher values increase re-ranking time
  107. # RELATED_CHUNK_NUMBER=5
  108. #########################################################
  109. ### Reranking configuration
  110. ### RERANK_BINDING type: null, cohere, jina, aliyun
  111. ### For rerank model deployed by vLLM use cohere binding
  112. ### If LightRAG deployed in Docker:
  113. ### uses host.docker.internal instead of localhost in RERANK_BINDING_HOST
  114. #########################################################
  115. RERANK_BINDING=cohere
  116. # RERANK_BINDING=null
  117. RERANK_MODEL=BAAI/bge-reranker-v2-m3
  118. RERANK_BINDING_HOST=http://localhost:8000/rerank
  119. RERANK_BINDING_API_KEY=3f5abc937e4263cdefc4f77df4cb0c37
  120. ### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough)
  121. # MIN_RERANK_SCORE=0.0
  122. ### Enable rerank by default in query params when RERANK_BINDING is not null
  123. # RERANK_BY_DEFAULT=True
  124. ### Rerank concurrency and timeout (independent from base LLM settings)
  125. ### MAX_ASYNC_RERANK falls back to MAX_ASYNC when unset.
  126. ### RERANK_TIMEOUT has its own default (30s) since reranker calls are
  127. ### typically much shorter than full LLM generation.
  128. # MAX_ASYNC_RERANK=4
  129. # RERANK_TIMEOUT=30
  130. ### Cohere AI
  131. # # RERANK_MODEL=rerank-v3.5
  132. # # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
  133. # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
  134. ### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
  135. # RERANK_ENABLE_CHUNKING=true
  136. # RERANK_MAX_TOKENS_PER_DOC=480
  137. ### Aliyun Dashscope
  138. # # RERANK_MODEL=gte-rerank-v2
  139. # # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
  140. # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
  141. ### Jina AI
  142. # # RERANK_MODEL=jina-reranker-v2-base-multilingual
  143. # # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
  144. # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
  145. ### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API)
  146. ### Wizard metadata used to preserve the chosen deployment provider across setup reruns
  147. LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm
  148. LIGHTRAG_SETUP_RERANK_PROVIDER=vllm
  149. VLLM_EMBED_MODEL=BAAI/bge-m3
  150. VLLM_EMBED_PORT=8001
  151. VLLM_EMBED_DEVICE=cuda
  152. ### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank
  153. VLLM_EMBED_API_KEY=7f6904c8185e908a1e0bdf9f69cd3ccc
  154. # VLLM_EMBED_EXTRA_ARGS=
  155. VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3
  156. VLLM_RERANK_PORT=8000
  157. VLLM_RERANK_DEVICE=cuda
  158. ### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank
  159. VLLM_RERANK_API_KEY=3f5abc937e4263cdefc4f77df4cb0c37
  160. ### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image.
  161. # VLLM_USE_CPU=1
  162. ### Set to 1 for CPU mode, unset for GPU mode
  163. # CUDA_VISIBLE_DEVICES=-1
  164. ### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode
  165. # NVIDIA_VISIBLE_DEVICES=0
  166. ### Optional Docker runtime equivalent; generated GPU compose honors either variable.
  167. # VLLM_RERANK_EXTRA_ARGS=
  168. ########################################
  169. ### Document processing configuration
  170. ########################################
  171. ### Document processing output language: English, Chinese, French, German ...
  172. SUMMARY_LANGUAGE=Chinese
  173. # SUMMARY_LANGUAGE=English
  174. ### Enable JSON-structured output for entity extraction
  175. ### Default behavior: JSON output is disabled when ENTITY_EXTRACTION_USE_JSON is unset
  176. ### JSON output incurs higher latency but delivers improved reliability
  177. ENTITY_EXTRACTION_USE_JSON=true
  178. ### Optional external YAML profile for entity type guidance and extraction examples
  179. ### Profiles are loaded from PROMPT_DIR/entity_type (PROMPT_DIR defaults to ./prompts).
  180. ### A reference template is shipped at prompts/samples/entity_type_prompt.sample.yml;
  181. # ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
  182. # PROMPT_DIR=<absolute_path_for_prompt_dir>
  183. ### Multimodal parsing/analyze integration
  184. ### Optional parser routing rules. Example for VLM & MinerU enabled configuration:
  185. ### LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
  186. ### Rules may be separated with commas or semicolons. Rules match file suffixes
  187. ### (pdf), not full names (*.pdf), and are checked left-to-right.
  188. ### If mineru/docling appears in LIGHTRAG_PARSER, the corresponding endpoint
  189. ### below must be configured before server startup.
  190. ### See docs/FileProcessingPipeline.md for detail
  191. LIGHTRAG_PARSER=*:native-teP,*:legacy-R
  192. ### Async parser service protocol (optional)
  193. ### Configure these when using remote MinerU/Docling async services
  194. ### ---- MinerU shared parameters (both local and official modes) ----
  195. ### MinerU API protocol. Choose one active mode.
  196. ### - official: MinerU precision API v4. Requires MINERU_API_TOKEN.
  197. ### - local: self-hosted mineru-api / mineru-router base URL.
  198. MINERU_API_MODE=local
  199. # MINERU_POLL_INTERVAL_SECONDS=2
  200. # MINERU_MAX_POLLS=180
  201. # MINERU_LANGUAGE=ch
  202. # MINERU_ENABLE_TABLE=true
  203. # MINERU_ENABLE_FORMULA=true
  204. # MINERU_PAGE_RANGES=
  205. ### MINERU_PAGE_RANGES semantics differ by mode:
  206. ### - official: forwarded verbatim, supports e.g. "1-3,5,7-9".
  207. ### - local: only a single page ("3") or simple range ("1-10"); comma
  208. ### lists are rejected at startup.
  209. ### When switching modes, double-check this constraint.
  210. ### ---- MinerU local-only (MINERU_API_MODE=local) ----
  211. MINERU_LOCAL_ENDPOINT=http://127.0.0.1:8000
  212. ### MINERU_LOCAL_BACKEND: which mineru-api backend handles the parse.
  213. ### Accepted values (per mineru-api POST /tasks form parameter `backend`):
  214. ### hybrid-auto-engine - pipeline + VLM combo with auto-selected local
  215. ### engine (mineru-api's default). GPU required.
  216. ### pipeline - CPU-friendly traditional pipeline; no VLM step.
  217. ### vlm-auto-engine - VLM with auto-selected local inference engine
  218. ### (sglang-engine / vllm-engine if GPU is available);
  219. ### requires the matching engine extra preinstalled
  220. ### on the mineru-api side, plus model weights.
  221. ### We ship `hybrid-auto-engine` -- requires the target mineru-api
  222. ### deployment to have a GPU plus the matching inference engine
  223. ### (sglang / vllm) and model weights installed. Switch to `pipeline`
  224. ### for CPU-only deployments without those dependencies.
  225. MINERU_LOCAL_BACKEND=hybrid-auto-engine
  226. ### MINERU_LOCAL_PARSE_METHOD: parsing strategy for the pipeline component.
  227. ### Accepted values:
  228. ### auto - auto-detect embedded text-layer vs OCR per page (default).
  229. ### txt - extract text from the embedded text layer only; fastest,
  230. ### but yields empty output on scanned PDFs without a text layer.
  231. ### ocr - force OCR on every page regardless of text-layer quality;
  232. ### slowest, reliable on scanned or low-quality PDFs.
  233. ### Only consumed when MINERU_LOCAL_BACKEND is `pipeline` or
  234. ### `hybrid-auto-engine` (the pipeline arm of the hybrid pipeline).
  235. ### Pure VLM backends (`vlm-auto-engine`, `vlm-http-client`) ignore this
  236. ### parameter -- the VLM model handles layout/OCR natively.
  237. MINERU_LOCAL_PARSE_METHOD=auto
  238. ### MINERU_LOCAL_IMAGE_ANALYSIS: enable VLM image/chart analysis pass for
  239. ### better caption an footnote recognition.
  240. ### Only consumed by `vlm-auto-engine`, `vlm-http-client`,
  241. ### `hybrid-auto-engine`, `hybrid-http-client`. The `pipeline` backend
  242. ### silently drops this flag -- its `_process_pipeline` does not accept
  243. ### the kwarg, so setting `false` under pipeline does NOT speed parsing
  244. ### up; pipeline never invokes the VLM image pass to begin with.
  245. ### Disable (`false`) on VLM / hybrid backends to skip the extra VLM
  246. ### round, trading image / chart semantic descriptions for faster parsing
  247. ### and lower GPU cost.
  248. MINERU_LOCAL_IMAGE_ANALYSIS=true
  249. # MINERU_LOCAL_START_PAGE_ID=0
  250. # MINERU_LOCAL_END_PAGE_ID=99999
  251. ### ---- MinerU official-only (MINERU_API_MODE=official) ----
  252. # MINERU_API_TOKEN=your-api-key
  253. # MINERU_OFFICIAL_ENDPOINT=https://mineru.net
  254. # MINERU_MODEL_VERSION=vlm
  255. # MINERU_IS_OCR=false
  256. ### Force re-upload of file to MinerU on every retry after failure
  257. ### Disables caching of result outcomes
  258. # LIGHTRAG_FORCE_REPARSE_MINERU=false
  259. ### Docling parser (docling-serve v1 / async API).
  260. ###
  261. ### Endpoint: base URL only — the client appends /v1/convert/file/async,
  262. ### /v1/status/poll/{task_id}?wait=<DOCLING_POLL_INTERVAL_SECONDS>,
  263. ### /v1/result/{task_id} itself.
  264. ### Pipeline shape (pipeline=standard, target_type=zip,
  265. ### to_formats=[json,md], image_export_mode=referenced) is fixed in
  266. ### code so the sidecar flow stays self-consistent — flipping any of
  267. ### these would break the adapter and is therefore not exposed as env.
  268. ###
  269. ### OCR tunables:
  270. ### - DOCLING_DO_OCR: master switch; when false the engine relies only on
  271. ### text-layer extraction.
  272. ### - DOCLING_FORCE_OCR: when true, OCR every page regardless of text-layer
  273. ### quality (slower, useful for scanned PDFs with bad text layers).
  274. ### - DOCLING_OCR_ENGINE: explicit engine selection (DEPRECATED in the
  275. ### docling-serve OpenAPI but still honored for older deployments).
  276. ### - DOCLING_OCR_PRESET: recommended replacement for DOCLING_OCR_ENGINE.
  277. ### - DOCLING_OCR_LANG: JSON array (e.g. ["en","zh"]) or comma-separated
  278. ### list. Empty (default) lets the OCR engine pick its default.
  279. ### - DOCLING_DO_FORMULA_ENRICHMENT: when true, the code-formula model runs
  280. ### and `texts[*].label="formula"` items carry LaTeX in `text`. Default
  281. ### false because the model may not be present on every deployment;
  282. ### adapter falls back to plain-text formulas when disabled.
  283. ###
  284. ### Polling budget (server-side long-poll; client does NOT add extra sleep):
  285. ### - DOCLING_POLL_INTERVAL_SECONDS: ``?wait=N`` value sent to
  286. ### /v1/status/poll/{task_id}. Larger N = fewer round trips per parse;
  287. ### bound by your reverse-proxy idle timeout. Default 5.
  288. ### - DOCLING_MAX_POLLS: max polling rounds before raising TimeoutError.
  289. ### Worst-case wall-clock budget ≈
  290. ### DOCLING_POLL_INTERVAL_SECONDS × DOCLING_MAX_POLLS. Default 240
  291. ### (≈ 20 minutes at wait=5s); raise for very large PDFs.
  292. ###
  293. ### Bundle cache controls:
  294. ### - DOCLING_ENGINE_VERSION: recorded in <base>.docling_raw/_manifest.json.
  295. ### Mismatch with the recorded value forces a cache miss → re-download.
  296. ### Leave empty to skip this check.
  297. ### - LIGHTRAG_FORCE_REPARSE_DOCLING: when truthy ("1"/"true"), bypass the
  298. ### docling raw cache and re-upload on every parse_docling call.
  299. ### - DOCLING_BBOX_ATTRIBUTES: override the doc-level bbox_attributes
  300. ### written into <base>.blocks.jsonl meta. Default
  301. ### {"origin":"LEFTBOTTOM"} matches docling's default coordinate system.
  302. DOCLING_ENDPOINT=http://localhost:5001
  303. DOCLING_DO_OCR=true
  304. DOCLING_FORCE_OCR=true
  305. DOCLING_DO_FORMULA_ENRICHMENT=false
  306. # DOCLING_OCR_ENGINE=auto
  307. # DOCLING_OCR_PRESET=auto
  308. # DOCLING_OCR_LANG=
  309. # DOCLING_POLL_INTERVAL_SECONDS=5
  310. # DOCLING_MAX_POLLS=240
  311. # DOCLING_BBOX_ATTRIBUTES={"origin":"LEFTBOTTOM"}
  312. ### Force re-upload of file to Docling on every retry after failure
  313. ### Disables caching of result outcomes
  314. # LIGHTRAG_FORCE_REPARSE_DOCLING=false
  315. ### File upload size limit (in bytes)
  316. ### Default: 104857600 (100MB)
  317. ### Set to 0 or None for unlimited upload size
  318. ### Examples:
  319. ### 52428800 = 50MB
  320. ### 104857600 = 100MB (default)
  321. ### 209715200 = 200MB
  322. ### Note: If using Nginx as reverse proxy, also configure client_max_body_size
  323. # MAX_UPLOAD_SIZE=104857600
  324. ### Chunk size for document splitting, 500~1500 is recommended
  325. # CHUNK_SIZE=1200
  326. # CHUNK_OVERLAP_SIZE=100
  327. ### Fixed-token chunker (process_options=F, default) settings
  328. ### CHUNK_F_OVERLAP_SIZE: token overlap; falls back to CHUNK_OVERLAP_SIZE when unset
  329. ### CHUNK_F_SPLIT_BY_CHARACTER: optional separator string; pre-segment before token windowing
  330. ### CHUNK_F_SPLIT_BY_CHARACTER_ONLY: when true, raise on oversize segment instead of token re-split
  331. # CHUNK_F_OVERLAP_SIZE=100
  332. # CHUNK_F_SPLIT_BY_CHARACTER=
  333. # CHUNK_F_SPLIT_BY_CHARACTER_ONLY=false
  334. ### Recursive character chunker (process_options=R) settings
  335. ### CHUNK_R_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
  336. ### CHUNK_R_OVERLAP_SIZE: token overlap between adjacent chunks; falls back to CHUNK_OVERLAP_SIZE when unset
  337. ### CHUNK_R_SEPARATORS: JSON array of cascaded separators tried by RecursiveCharacterTextSplitter.
  338. ### Default includes CJK sentence-ending punctuation so Chinese / mixed-language
  339. ### documents split at semantic boundaries. Order: paragraph (\n\n) > line (\n) >
  340. ### Chinese sentence-end (。!?) > Chinese semi-clause (;,) > space > char.
  341. ### English ".?!" are intentionally omitted (literal match would split "0.95" /
  342. ### "e.g."); the English path falls through space / char as before.
  343. # CHUNK_R_SIZE=1200
  344. # CHUNK_R_OVERLAP_SIZE=100
  345. # CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
  346. ### Semantic vector chunker (process_options=V) settings
  347. ### CHUNK_V_SIZE: per-strategy chunk_token_size hard cap (oversized pieces are
  348. ### re-split via R before being emitted); falls back to CHUNK_SIZE when unset
  349. ### CHUNK_V_BREAKPOINT_THRESHOLD_TYPE: percentile | standard_deviation | interquartile | gradient
  350. ### CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT: leave empty to use the LangChain per-type default (e.g. 95 for percentile)
  351. ### CHUNK_V_BUFFER_SIZE: number of adjacent sentences combined when computing distances
  352. ### CHUNK_V_SENTENCE_SPLIT_REGEX: regex fed to LangChain SemanticChunker for the
  353. ### initial sentence split. Default extends the upstream English-only pattern
  354. ### with CJK sentence-end punctuation (。?!). Override if you need a
  355. ### different language mix. Note: env value is the raw regex string, no JSON
  356. ### quoting.
  357. # CHUNK_V_SIZE=1200
  358. # CHUNK_V_BREAKPOINT_THRESHOLD_TYPE=percentile
  359. # CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT=
  360. # CHUNK_V_BUFFER_SIZE=1
  361. # CHUNK_V_SENTENCE_SPLIT_REGEX=(?<=[.?!])\s+|(?<=[。?!])
  362. ### Paragraph semantic chunker (process_options=P) settings
  363. ### CHUNK_P_SIZE: per-strategy chunk_token_size override; defaults to 2000 when unset
  364. ### (does NOT fall back to CHUNK_SIZE — paragraph-semantic merging needs more
  365. ### headroom than the global default to keep related paragraphs together).
  366. ### CHUNK_P_OVERLAP_SIZE: overlap for prose fallback and table-bridge context;
  367. ### falls back to CHUNK_OVERLAP_SIZE when unset
  368. # CHUNK_P_SIZE=2000
  369. # CHUNK_P_OVERLAP_SIZE=100
  370. ### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended)
  371. # FORCE_LLM_SUMMARY_ON_MERGE=8
  372. ### Max description token size to trigger LLM summary
  373. # SUMMARY_MAX_TOKENS = 1200
  374. ### Recommended LLM summary output length in tokens
  375. # SUMMARY_LENGTH_RECOMMENDED=600
  376. ### Maximum context size sent to LLM for description summary
  377. # SUMMARY_CONTEXT_SIZE=12000
  378. ### Maximum token size allowed for entity extraction input context
  379. # MAX_EXTRACT_INPUT_TOKENS=20480
  380. ### Multimodal surrounding-context budget (per-half token cap for the
  381. ### `leading` / `trailing` text injected into VLM and extract prompts).
  382. ### Computed at analyze_multimodal entry; the two halves are independent
  383. ### so deployments can bias context forward or backward as needed.
  384. # SURROUNDING_LEADING_MAX_TOKENS=2000
  385. # SURROUNDING_TRAILING_MAX_TOKENS=2000
  386. ### Per-response cap on total entity+relationship rows/records emitted by the LLM
  387. # MAX_EXTRACTION_RECORDS=100
  388. ### Per-response cap on entity rows/objects emitted by the LLM
  389. # MAX_EXTRACTION_ENTITIES=40
  390. ### control the maximum chunk_ids stored in vector and graph db
  391. # MAX_SOURCE_IDS_PER_ENTITY=300
  392. # MAX_SOURCE_IDS_PER_RELATION=300
  393. ### control chunk_ids limitation method: FIFO, KEEP
  394. ### FIFO: First in first out
  395. ### KEEP: Keep oldest (less merge action and faster)
  396. # SOURCE_IDS_LIMIT_METHOD=FIFO
  397. ### Maximum number of file paths stored in entity/relation file_path field
  398. ### For displayed only, does not affect query performance
  399. # MAX_FILE_PATHS=100
  400. ### PDF decryption password for protected PDF files
  401. # PDF_DECRYPT_PASSWORD=your_pdf_password_here
  402. ### LLM cache for entity/relation extract is enable by default
  403. ### Disabling it will prevent graph reconstruction after document deletion
  404. ENABLE_LLM_CACHE_FOR_EXTRACT=true
  405. ########################################
  406. ### Pipeline Concurrency Configuration
  407. ########################################
  408. ### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
  409. MAX_PARALLEL_INSERT=2
  410. ### Optional per-stage document pipeline concurrency
  411. # MAX_PARALLEL_PARSE_NATIVE=5
  412. # MAX_PARALLEL_PARSE_MINERU=1
  413. # MAX_PARALLEL_PARSE_DOCLING=1
  414. # MAX_PARALLEL_ANALYZE=5
  415. ### Optional queue sizes for staged pipeline workers
  416. # QUEUE_SIZE_DEFAULT=100
  417. # QUEUE_SIZE_INSERT=4
  418. ### Max concurrency requests for Embedding
  419. # EMBEDDING_FUNC_MAX_ASYNC=8
  420. ### Num of chunks send to Embedding in single request
  421. # EMBEDDING_BATCH_NUM=10
  422. ###########################################################################
  423. ### Gloabal LLM Configuration
  424. ### LLM_BINDING type: openai, ollama, lollms, azure_openai, bedrock, gemini
  425. ### LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
  426. ### LLM_BINDING_API_KEY: api key
  427. ### If LightRAG deployed in Docker:
  428. ### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
  429. ###########################################################################
  430. ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
  431. # LLM_TIMEOUT=180
  432. LLM_BINDING=openai
  433. LLM_BINDING_HOST=https://ai.znipower.com:5017
  434. # LLM_BINDING_HOST=https://api.openai.com/v1
  435. LLM_BINDING_API_KEY=sk-ffbkKu61NfLLCsXLzx2MRg
  436. # LLM_BINDING_API_KEY=your_api_key
  437. LLM_MODEL=gemini-3-flash-preview
  438. # LLM_MODEL=gpt-5.4-mini
  439. ### Max concurrency requests of LLM
  440. MAX_ASYNC=4
  441. ###########################################################################
  442. ### Role-specific LLM/VLM overrides
  443. ### Available roles: EXTRACT, KEYWORD, QUERY, VLM
  444. ### If unset, each role falls back to gloabal LLM configuration above.
  445. ### For detail information, refer to:
  446. ### docs/RoleSpecificLLMConfiguration.md
  447. ### docs/RoleSpecificLLMConfiguration-zh.md
  448. ###########################################################################
  449. KEYWORD_LLM_MODEL=gemini-3-flash-preview
  450. # KEYWORD_LLM_MODEL=gpt-5.4-nano
  451. # KEYWORD_MAX_ASYNC_LLM
  452. # KEYWORD_LLM_TIMEOUT=180
  453. # KEYWORD_LLM_BINDING=openai
  454. # KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
  455. # KEYWORD_LLM_BINDING_API_KEY=your_api_key
  456. QUERY_LLM_MODEL=gemini-3-flash-preview
  457. # QUERY_LLM_MODEL=gpt-5.4
  458. # QUERY_MAX_ASYNC_LLM
  459. # QUERY_LLM_TIMEOUT=180
  460. # QUERY_LLM_BINDING=openai
  461. # QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
  462. # QUERY_LLM_BINDING_API_KEY=your_api_key
  463. VLM_LLM_MODEL=gpt-5.4-mini
  464. # VLM_MAX_ASYNC_LLM=4
  465. # VLM_LLM_TIMEOUT=180
  466. # VLM_LLM_BINDING=openai
  467. # VLM_LLM_BINDING_HOST=https://api.example.com/v1
  468. # VLM_LLM_BINDING_API_KEY=your_vlm_api_key
  469. ### Master switch for VLM multimodal analysis (i/t/e items).
  470. ### When false, multimodal item is skipped regardless of document process_options
  471. ### When true, VLM_LLM_BINDING (or the base LLM_BINDING) must be vision-capable
  472. ### lollms is rejected at startup
  473. VLM_PROCESS_ENABLE=false
  474. ### Maximum image bytes sent to VLM (5242880=5MB)
  475. VLM_MAX_IMAGE_BYTES=5242880
  476. ###########################################################################
  477. ### Provider sepecific LLM options
  478. ### Increasing the temperature setting may help mitigate infinite inference
  479. ### loops during entity/elation extraction, particularly when using
  480. ### models with more limited capabilities, such as Qwen3-30B
  481. ### Set a max output token limit to prevent endless output from certain LLMs,
  482. ### which may trigger timeout errors during entity and relation extraction.
  483. ### max_output_token < LLM_TIMEOUT * llm_tokens_per_second
  484. ### i.e. max_output_token = 9000 = 180s * 50 tokens/s
  485. ### Sample commands to list all supported options specific LLM_BINDING:
  486. ### lightrag-server --llm-binding openai --help
  487. ### lightrag-server --llm-binding bedrock --help
  488. ### lightrag-server --llm-binding gemini --help
  489. ###########################################################################
  490. ### OpenAI Specific Parameters (Openrouter of other OpenAI compatible API):
  491. ### LLM_BINDING=openai
  492. ### LLM_BINDING_HOST=https://openrouter.ai/api/v1
  493. ### LLM_MODEL=google/gemini-2.5-flash
  494. # OPENAI_LLM_TEMPERATURE=0.9
  495. ### For vLLM/SGLang and most of OpenAI compatible API provider
  496. # OPENAI_LLM_MAX_TOKENS=9000
  497. ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
  498. OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
  499. ### For OpenAI reason control
  500. # OPENAI_LLM_REASONING_EFFORT=minimal
  501. ### For OpenRouter reasoning control
  502. # OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
  503. ### For Qwen3 reasoning control deploy by vLLM
  504. # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
  505. ### Azure OpenAI Specific Parameters:
  506. ### LLM_BINDING=azure_openai
  507. ### LLM_BINDING_HOST=https://xxxx.openai.azure.com/
  508. ### LLM_BINDING_API_KEY=your_api_key
  509. ### LLM_MODEL=my-gpt-mini-deployment
  510. ### You may use deployment name for LLM_MODEL or set AZURE_OPENAI_DEPLOYMENT instead
  511. # AZURE_OPENAI_DEPLOYMEN=my—deplyment-name
  512. # AZURE_OPENAI_API_VERSION=2024-08-01-preview
  513. ### Google AI Studio Gemini Specific Parameters:
  514. ### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
  515. ### LLM_BINDING=gemini
  516. ### LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
  517. ### LLM_BINDING_API_KEY=your_gemini_api_key
  518. ### LLM_MODEL=gemini-flash-latest
  519. # GEMINI_LLM_TEMPERATURE=0.7
  520. # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
  521. ### Enable or disable thinking
  522. ### GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
  523. ### GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
  524. # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
  525. ### Google Vertex AI Gemini Specific Parameters:
  526. ### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
  527. # GOOGLE_GENAI_USE_VERTEXAI=true
  528. # GOOGLE_CLOUD_PROJECT='your-project-id'
  529. # GOOGLE_CLOUD_LOCATION='us-central1'
  530. # GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json'
  531. ### Bedrock Specific Parameters:
  532. ### LLM_BINDING=bedrock
  533. ### LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
  534. ### LLM_MODEL=us.amazon.nova-lite-v1:0
  535. ### Region is required for all three modes (Bedrock endpoints are regional).
  536. # AWS_REGION=us-west-1
  537. ### Bedrock Authentication (choose ONE of the following three approaches):
  538. ### Bedrock API key (bearer token). Bedrock ignores LLM_BINDING_API_KEY;
  539. ### set AWS_BEARER_TOKEN_BEDROCK directly before startup. This is a
  540. ### process-level AWS SDK setting and cannot be overridden per role.
  541. # AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
  542. ### SigV4 credentials (classic IAM user / STS / instance profile).
  543. # AWS_ACCESS_KEY_ID=your_aws_access_key_id
  544. # AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
  545. # AWS_SESSION_TOKEN=your_optional_aws_session_token
  546. ### Ambient credentials (AWS SDK default credential chain).
  547. ### To use this mode, leave AWS_BEARER_TOKEN_BEDROCK, AWS_ACCESS_KEY_ID,
  548. ### AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN above commented out — the
  549. ### AWS SDK will then resolve credentials from ~/.aws/credentials, IAM role,
  550. ### instance profile, SSO, or environment variables outside .env.
  551. ### Activating any of the lines above forces that explicit mode and bypasses
  552. ### the credential chain.
  553. # BEDROCK_LLM_TEMPERATURE=1.0
  554. # BEDROCK_LLM_MAX_TOKENS=9000
  555. # BEDROCK_LLM_TOP_P=1.0
  556. # BEDROCK_LLM_STOP_SEQUENCES='["</s>"]'
  557. ### Bedrock model reasoning control
  558. # BEDROCK_LLM_EXTRA_FIELDS='{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}'
  559. ### Ollama Specific Parameters:
  560. ### LLM_BINDING=ollama
  561. ### LLM_BINDING_HOST=http://localhost:11434
  562. ### LLM_MODEL=qwen3.5:9b
  563. ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
  564. OLLAMA_LLM_NUM_CTX=32768
  565. # OLLAMA_LLM_NUM_PREDICT=9000
  566. # OLLAMA_LLM_TEMPERATURE=0.85
  567. # OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'
  568. #######################################################################################
  569. ### Embedding Configuration (Should not be changed after the first file processed)
  570. ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, bedrock
  571. ### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
  572. ### EMBEDDING_BINDING_API_KEY: api key
  573. ### If LightRAG deployed in Docker:
  574. ### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
  575. ### Control whether to send embedding_dim parameter to embedding API
  576. ### For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment
  577. ### For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter
  578. ### For Gemini: Allways set EMBEDDING_SEND_DIM=true
  579. ### Control whether to use base64 encoding format for embeddings (improves performance for OpenAI)
  580. ### For OpenAI: Set EMBEDDING_USE_BASE64=true (default) to use base64 encoding
  581. ### For Yandex Cloud and other providers that don't support it: Set EMBEDDING_USE_BASE64=false
  582. #######################################################################################
  583. # EMBEDDING_TIMEOUT=30
  584. ### OpenAI compatible embedding
  585. EMBEDDING_BINDING=openai
  586. EMBEDDING_BINDING_HOST=http://localhost:8001/v1
  587. # EMBEDDING_BINDING_HOST=https://api.openai.com/v1
  588. EMBEDDING_BINDING_API_KEY=7f6904c8185e908a1e0bdf9f69cd3ccc
  589. # EMBEDDING_BINDING_API_KEY=your_api_key
  590. EMBEDDING_MODEL=BAAI/bge-m3
  591. # EMBEDDING_MODEL=text-embedding-3-large
  592. EMBEDDING_DIM=1024
  593. # EMBEDDING_DIM=3072
  594. EMBEDDING_TOKEN_LIMIT=8192
  595. EMBEDDING_SEND_DIM=false
  596. EMBEDDING_USE_BASE64=true
  597. ### Optional: asymmetric embeddings (query/document behavior split)
  598. ### Leave EMBEDDING_ASYMMETRIC unset or set false to keep symmetric behavior.
  599. ### Set true only when the selected embedding backend supports asymmetric mode.
  600. # EMBEDDING_ASYMMETRIC=true
  601. ### Provider-task bindings such as Jina/Gemini/VoyageAI use provider parameters
  602. ### and should not configure the prefix variables below.
  603. ### Prefix-based models such as BGE/E5/GTE require both prefix variables.
  604. ### Wrap non-empty values with quotes if there are trailing spaces.
  605. # EMBEDDING_DOCUMENT_PREFIX="search_document: "
  606. ### Use NO_PREFIX for a side that should intentionally have no prefix.
  607. ### EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
  608. # EMBEDDING_QUERY_PREFIX="search_query: "
  609. ###########################################################################
  610. ### Provider sepecific Embedding options
  611. ### Increasing the temperature setting may help mitigate infinite inference
  612. ### loops during entity/elation extraction, particularly when using
  613. ### models with more limited capabilities, such as Qwen3-30B
  614. ### Set a max output token limit to prevent endless output from certain LLMs,
  615. ### which may trigger timeout errors during entity and relation extraction.
  616. ### max_output_token < LLM_TIMEOUT * llm_tokens_per_second
  617. ### i.e. max_output_token = 9000 = 180s * 50 tokens/s
  618. ### Sample commands to list all supported options specific EMBEDDING_BINDING:
  619. ### lightrag-server --embedding-binding openai --help
  620. ### lightrag-server --embedding-binding ollama --help
  621. ### lightrag-server --embedding-binding bedrock --help
  622. ###########################################################################
  623. ### Azure Embedding Specific Parameters:
  624. ### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
  625. ### EMBEDDING_BINDING=azure_openai
  626. ### EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
  627. ### EMBEDDING_API_KEY=your_api_key
  628. ### EMBEDDING_MODEL==my-text-embedding-3-large-deployment
  629. ### EMBEDDING_DIM=3072
  630. # AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
  631. ### Ollama Embedding Specific Parameters:
  632. ### EMBEDDING_BINDING=ollama
  633. ### EMBEDDING_BINDING_HOST=http://localhost:11434
  634. ### EMBEDDING_BINDING_API_KEY=your_api_key
  635. ### EMBEDDING_MODEL=qwen3-embedding:4b
  636. ### EMBEDDING_DIM=2560
  637. ### Ollama should set num_ctx option inaddition to EMBEDDING_TOKEN_LIMIT
  638. OLLAMA_EMBEDDING_NUM_CTX=8192
  639. ### Gemini Embedding Specific Parameters:
  640. ### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
  641. ### Gemini embedding requires sending dimension to server
  642. ### EMBEDDING_BINDING=gemini
  643. ### EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
  644. ### EMBEDDING_BINDING_API_KEY=your_api_key
  645. ### EMBEDDING_MODEL=gemini-embedding-001
  646. ### EMBEDDING_DIM=1536
  647. ### EMBEDDING_TOKEN_LIMIT=2048
  648. ### EMBEDDING_SEND_DIM=true
  649. ### Bedrock Embedding Specific Parameters:
  650. ### EMBEDDING_BINDING=bedrock
  651. ### EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
  652. ### EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
  653. ### EMBEDDING_DIM=1024
  654. ### Share the same region and authentication settings as LLMs, no reconfiguration here
  655. ### AWS_REGION=us-west-1
  656. ### AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
  657. ### AWS_ACCESS_KEY_ID=your_aws_access_key_id
  658. ### AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
  659. ### AWS_SESSION_TOKEN=your_optional_aws_session_token
  660. ### Jina AI Embedding Specific Parameters:
  661. ### EMBEDDING_BINDING=jina
  662. ### EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
  663. ### EMBEDDING_MODEL=jina-embeddings-v4
  664. ### EMBEDDING_DIM=2048
  665. ### EMBEDDING_BINDING_API_KEY=your_api_key
  666. ####################################################################
  667. ### WORKSPACE sets workspace name for all storage types
  668. ### for the purpose of isolating data from LightRAG instances.
  669. ### Valid workspace name constraints: a-z, A-Z, 0-9, and _
  670. ####################################################################
  671. # WORKSPACE=
  672. ############################
  673. ### Data storage selection
  674. ############################
  675. ### Default storage: JSON/Nano/NetworkX (Recommended for test deployment)
  676. LIGHTRAG_KV_STORAGE=PGKVStorage
  677. # LIGHTRAG_KV_STORAGE=JsonKVStorage
  678. LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
  679. # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
  680. LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
  681. # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
  682. LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
  683. # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
  684. ### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns
  685. LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker
  686. LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker
  687. # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker
  688. # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable
  689. # LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker
  690. LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker
  691. # LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker
  692. # LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker
  693. # LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker
  694. ### PostgreSQL Configuration
  695. POSTGRES_HOST=localhost
  696. POSTGRES_PORT=5432
  697. POSTGRES_USER=rag
  698. # POSTGRES_USER=your_username
  699. POSTGRES_PASSWORD=rag
  700. # POSTGRES_PASSWORD='your_password'
  701. POSTGRES_DATABASE=rag
  702. POSTGRES_MAX_CONNECTIONS=12
  703. # POSTGRES_MAX_CONNECTIONS=25
  704. ### DB specific workspace should not be set, keep for compatible only
  705. # POSTGRES_WORKSPACE=forced_workspace_name
  706. ### Use HNSW_HALFVEC for large embeddings (2000+ dim).
  707. ### Requires pgvector extension >= 0.7.0.
  708. ### Vector storage type: HNSW, HNSW_HALFVEC, IVFFlat, VCHORDRQ
  709. POSTGRES_VECTOR_INDEX_TYPE=HNSW
  710. POSTGRES_HNSW_M=16
  711. POSTGRES_HNSW_EF=200
  712. POSTGRES_IVFFLAT_LISTS=100
  713. POSTGRES_VCHORDRQ_BUILD_OPTIONS=
  714. POSTGRES_VCHORDRQ_PROBES=
  715. POSTGRES_VCHORDRQ_EPSILON=1.9
  716. ### PostgreSQL Connection Retry Configuration (Network Robustness)
  717. ### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time
  718. ### These defaults provide out-of-the-box support for PostgreSQL High Availability setups
  719. ###
  720. ### Number of retry attempts (1-100, default: 10)
  721. ### - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios)
  722. ### - For extreme cases: increase up to 20-50
  723. ### Initial retry backoff in seconds (0.1-300.0, default: 3.0)
  724. ### - Default 3.0s provides reasonable initial delay for switchover detection
  725. ### - For faster recovery: decrease to 1.0-2.0
  726. ### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0)
  727. ### - Default 30.0s matches typical switchover completion time
  728. ### - For longer switchovers: increase to 60-90
  729. ### Connection pool close timeout in seconds (1.0-30.0, default: 5.0)
  730. # POSTGRES_CONNECTION_RETRIES=10
  731. # POSTGRES_CONNECTION_RETRY_BACKOFF=3.0
  732. # POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0
  733. # POSTGRES_POOL_CLOSE_TIMEOUT=5.0
  734. ### PostgreSQL SSL Configuration (Optional)
  735. # POSTGRES_SSL_MODE=require
  736. # POSTGRES_SSL_CERT=/path/to/client-cert.pem
  737. # POSTGRES_SSL_KEY=/path/to/client-key.pem
  738. # POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
  739. # POSTGRES_SSL_CRL=/path/to/crl.pem
  740. ### PostgreSQL Server Settings (for Supabase Supavisor)
  741. # Use this to pass extra options to the PostgreSQL connection string.
  742. # For Supabase, you might need to set it like this:
  743. # POSTGRES_SERVER_SETTINGS='options=reference%3D[project-ref]'
  744. # Default is 100 set to 0 to disable
  745. # POSTGRES_STATEMENT_CACHE_SIZE=100
  746. ### Neo4j Configuration
  747. NEO4J_URI=neo4j://localhost:7687
  748. # NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
  749. NEO4J_USERNAME=neo4j
  750. NEO4J_PASSWORD=Daniel2026
  751. # NEO4J_PASSWORD='your_password'
  752. NEO4J_DATABASE=neo4j
  753. NEO4J_MAX_CONNECTION_POOL_SIZE=100
  754. NEO4J_CONNECTION_TIMEOUT=30
  755. NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
  756. NEO4J_MAX_TRANSACTION_RETRY_TIME=30
  757. NEO4J_MAX_CONNECTION_LIFETIME=300
  758. NEO4J_LIVENESS_CHECK_TIMEOUT=30
  759. NEO4J_KEEP_ALIVE=true
  760. ### DB specific workspace should not be set, keep for compatible only
  761. # NEO4J_WORKSPACE=forced_workspace_name
  762. ### MongoDB Configuration
  763. # For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with
  764. # Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local.
  765. MONGO_URI=mongodb://root:root@localhost:27017/
  766. # MONGO_URI=mongodb://localhost:27017/
  767. MONGO_DATABASE=LightRAG
  768. ### DB specific workspace should not be set, keep for compatible only
  769. # MONGODB_WORKSPACE=forced_workspace_name
  770. # Community/local Docker MongoDB example for KV, graph, or doc-status storage only:
  771. # MONGO_URI=mongodb://localhost:27017/
  772. ### OpenSearch Configuration
  773. ### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus
  774. ### Connection settings (comma-separated host:port entries; do not include http:// or https://)
  775. ### This setup wizard supports authenticated OpenSearch clusters only.
  776. ### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS.
  777. OPENSEARCH_HOSTS=localhost:9200
  778. OPENSEARCH_USER=admin
  779. OPENSEARCH_PASSWORD=LightRAG2026_!@
  780. OPENSEARCH_USE_SSL=true
  781. OPENSEARCH_VERIFY_CERTS=false
  782. # OPENSEARCH_TIMEOUT=30
  783. # OPENSEARCH_MAX_RETRIES=3
  784. ### Index Settings (for 3-AZ Amazon OpenSearch Service, set replicas to 2)
  785. # OPENSEARCH_NUMBER_OF_SHARDS=1
  786. # OPENSEARCH_NUMBER_OF_REPLICAS=0
  787. ### k-NN Settings for Vector Storage (HNSW algorithm)
  788. # OPENSEARCH_KNN_EF_CONSTRUCTION=200
  789. # OPENSEARCH_KNN_M=16
  790. # OPENSEARCH_KNN_EF_SEARCH=100
  791. ### PPL graphlookup for server-side graph traversal (auto-detected if not set)
  792. # OPENSEARCH_USE_PPL_GRAPHLOOKUP=true
  793. ### DB specific workspace should not be set, keep for compatible only
  794. # OPENSEARCH_WORKSPACE=forced_workspace_name
  795. ### Milvus Configuration
  796. MILVUS_URI=http://localhost:19530
  797. MILVUS_DB_NAME=lightrag
  798. MILVUS_DEVICE=cuda
  799. # MILVUS_USER=root
  800. # MILVUS_PASSWORD=your_password
  801. # MILVUS_TOKEN=your_token
  802. # Required for the bundled Docker Milvus stack; may come from .env or exported shell variables.
  803. MINIO_ACCESS_KEY_ID=minioadmin
  804. MINIO_SECRET_ACCESS_KEY=minioadmin
  805. ### DB specific workspace should not be set, keep for compatible only
  806. # MILVUS_WORKSPACE=forced_workspace_name
  807. ### Milvus Vector Index Configuration
  808. ### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN
  809. # MILVUS_INDEX_TYPE=AUTOINDEX
  810. ### Metric type: COSINE (default), L2, IP
  811. # MILVUS_METRIC_TYPE=COSINE
  812. ### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults)
  813. ### M: Maximum number of connections per node [2-2048], default 16
  814. # MILVUS_HNSW_M=16
  815. ### efConstruction: Size of dynamic candidate list during build [8-512], default 360
  816. # MILVUS_HNSW_EF_CONSTRUCTION=360
  817. ### ef: Size of dynamic candidate list during search, default 200
  818. # MILVUS_HNSW_EF=200
  819. ### HNSW_SQ Specific Parameters (requires Milvus 2.6.8+)
  820. ### sq_type: Scalar quantization type - SQ4U, SQ6, SQ8 (default), BF16, FP16
  821. # MILVUS_HNSW_SQ_TYPE=SQ8
  822. ### refine: Enable refinement step for higher precision, default false
  823. # MILVUS_HNSW_SQ_REFINE=false
  824. ### refine_type: Refinement precision (must be higher than sq_type) - SQ6, SQ8, BF16, FP16, FP32
  825. # MILVUS_HNSW_SQ_REFINE_TYPE=FP32
  826. ### refine_k: Refinement expansion factor, default 10
  827. # MILVUS_HNSW_SQ_REFINE_K=10
  828. ### IVF_FLAT / IVF_SQ8 Parameters
  829. ### nlist: Number of cluster units [1-65536], recommended sqrt(n) for n>1M, default 1024
  830. # MILVUS_IVF_NLIST=1024
  831. ### nprobe: Number of units to query [1-nlist], default 16
  832. # MILVUS_IVF_NPROBE=16
  833. ### Qdrant
  834. QDRANT_URL=http://localhost:6333
  835. # QDRANT_DEVICE=cpu
  836. # QDRANT_API_KEY=your-api-key
  837. ### Qdrant upsert batching (enabled by default)
  838. ### Split large upserts by estimated JSON payload size and point count
  839. ### Default 16MB keeps safe headroom below common 32MB gateway/request limits
  840. # QDRANT_UPSERT_MAX_PAYLOAD_BYTES=16777216
  841. # QDRANT_UPSERT_MAX_POINTS_PER_BATCH=128
  842. ### DB specific workspace should not be set, keep for compatible only
  843. # QDRANT_WORKSPACE=forced_workspace_name
  844. ### Redis
  845. REDIS_URI=redis://localhost:6379
  846. REDIS_SOCKET_TIMEOUT=30
  847. REDIS_CONNECT_TIMEOUT=10
  848. REDIS_MAX_CONNECTIONS=100
  849. REDIS_RETRY_ATTEMPTS=3
  850. ### DB specific workspace should not be set, keep for compatible only
  851. # REDIS_WORKSPACE=forced_workspace_name
  852. ### Memgraph Configuration
  853. MEMGRAPH_URI=bolt://localhost:7687
  854. MEMGRAPH_USERNAME=
  855. MEMGRAPH_PASSWORD=
  856. MEMGRAPH_DATABASE=memgraph
  857. ### DB specific workspace should not be set, keep for compatible only
  858. # MEMGRAPH_WORKSPACE=forced_workspace_name
  859. ###########################################################
  860. ### Langfuse Observability Configuration
  861. ### Only works with LLM provided by OpenAI compatible API
  862. ### Install with: pip install lightrag-hku[observability]
  863. ### Sign up at: https://cloud.langfuse.com or self-host
  864. ###########################################################
  865. # LANGFUSE_SECRET_KEY=''
  866. # LANGFUSE_PUBLIC_KEY=''
  867. # LANGFUSE_HOST='https://cloud.langfuse.com'
  868. # LANGFUSE_ENABLE_TRACE=true
  869. ############################
  870. ### Evaluation Configuration
  871. ############################
  872. ### RAGAS evaluation models (used for RAG quality assessment)
  873. ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
  874. ### Default uses OpenAI models for evaluation
  875. ### LLM Configuration for Evaluation
  876. # EVAL_LLM_MODEL=gpt-4o-mini
  877. ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
  878. # EVAL_LLM_BINDING_API_KEY=your_api_key
  879. ### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
  880. # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
  881. ### Embedding Configuration for Evaluation
  882. # EVAL_EMBEDDING_MODEL=text-embedding-3-large
  883. ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
  884. # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
  885. ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
  886. # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
  887. ### Performance Tuning
  888. ### Number of concurrent test case evaluations
  889. ### Lower values reduce API rate limit issues but increase evaluation time
  890. # EVAL_MAX_CONCURRENT=2
  891. ### TOP_K query parameter of LightRAG (default: 10)
  892. ### Number of entities or relations retrieved from KG
  893. # EVAL_QUERY_TOP_K=10
  894. ### LLM request retry and timeout settings for evaluation
  895. # EVAL_LLM_MAX_RETRIES=5
  896. # EVAL_LLM_TIMEOUT=180
  897. ##########################################################################
  898. ### ----- Preserved custom environment variables from previous .env -----
  899. ### ----- Comments in this session will persist across regenerations -----
  900. ### (This must be the final session; ensure the preceding lines unchanged)
  901. ##########################################################################
  902. ### The "make env*" wizard will leave the following lines unchanged
  903. ### You may add additional env vars or commnets here for your own purpose
  904. ##########################################################################
  905. ### Default Storage (Recommended for test deployment)
  906. # LIGHTRAG_KV_STORAGE=JsonKVStorage
  907. # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
  908. # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
  909. # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
  910. ### Production Storage
  911. # LIGHTRAG_KV_STORAGE=RedisKVStorage
  912. # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
  913. # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
  914. # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
  915. ### Select OpenSearch for all storages
  916. # LIGHTRAG_KV_STORAGE=OpenSearchKVStorage
  917. # LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage
  918. # LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage
  919. # LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage
  920. ### Select PostgreSQL for all storages
  921. # LIGHTRAG_KV_STORAGE=PGKVStorage
  922. # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
  923. # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
  924. # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
  925. ### Select MongoDB for all storage (Vector storage requires an Atlas-capable deployment)
  926. # LIGHTRAG_KV_STORAGE=MongoKVStorage
  927. # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
  928. # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
  929. # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
  930. ### ----- Extra setting from previous .env -----
  931. # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
  932. POSTGRES_ENABLE_VECTOR=true