| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989 |
- ### All configurable environment variable must show up in this sample file in active or comment out status
- ### Setup tool `make env-*` uses this file to generate final .env file
- ### Target environment of this env file: host/compose (compose is for Dokcer or Kubernetes)
- # LIGHTRAG_RUNTIME_TARGET=host
- ###########################
- ### Server Configuration
- ###########################
- HOST=0.0.0.0
- PORT=9621
- WEBUI_TITLE='My Graph KB'
- WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
- # WORKERS=2
- ### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
- # TIMEOUT=150
- # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
- ### Path Prefix Configuration (Optional)
- ### Used to host multiple LightRAG instances on one host behind a reverse
- ### proxy that routes by site prefix. Leave unset (or empty) for a
- ### single-instance deployment.
- ###
- ### - LIGHTRAG_API_PREFIX : reverse-proxy prefix the upstream proxy strips
- ### before forwarding (passed to FastAPI as root_path).
- ###
- ### See docs/MultiSiteDeployment.md for end-to-end examples.
- # LIGHTRAG_API_PREFIX=/site01
- ### Optional SSL Configuration
- ### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container
- # SSL=true
- # SSL_CERTFILE=/path/to/cert.pem
- # SSL_KEYFILE=/path/to/key.pem
- ### Directory Configuration (defaults to current working directory)
- ### Default value is: ./inputs ./rag_storage
- # INPUT_DIR=<absolute_path_for_doc_input_dir>
- # WORKING_DIR=<absolute_path_for_working_dir>
- ### Tiktoken cache directory (Store cached files in this folder for offline deployment)
- # TIKTOKEN_CACHE_DIR=/app/data/tiktoken
- ### Ollama Emulating Model and Tag
- # OLLAMA_EMULATING_MODEL_NAME=lightrag
- OLLAMA_EMULATING_MODEL_TAG=latest
- ### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
- # MAX_GRAPH_NODES=1000
- ### Logging level
- # LOG_LEVEL=INFO
- # VERBOSE=False
- # LOG_MAX_BYTES=10485760
- # LOG_BACKUP_COUNT=5
- ### Logfile location (defaults to current working directory)
- # LOG_DIR=/path/to/log/directory
- # LIGHTRAG_PERFORMANCE_TIMING_LOGS=false
- #####################################
- ### Login and API-Key Configuration
- #####################################
- # AUTH_ACCOUNTS='admin:admin123,user1:{bcrypt}$2b$12$S8Yu.gCbuAbNTJFB.231gegTwr5pgrFxc8H9kXQ4/sduFBHkhM8Ka'
- # TOKEN_SECRET=lightrag-jwt-default-secret-key!
- # JWT_ALGORITHM=HS256
- # TOKEN_EXPIRE_HOURS=48
- # GUEST_TOKEN_EXPIRE_HOURS=24
- ### Token Auto-Renewal Configuration (Sliding Window Expiration)
- ### Enable automatic token renewal to prevent active users from being logged out
- ### When enabled, tokens will be automatically renewed when remaining time < threshold
- # TOKEN_AUTO_RENEW=true
- ### Token renewal threshold (0.0 - 1.0)
- ### Renew token when remaining time < (total time * threshold)
- ### Default: 0.5 (renew when 50% time remaining)
- ### Examples:
- ### 0.5 = renew when 24h token has 12h left
- ### 0.25 = renew when 24h token has 6h left
- # TOKEN_RENEW_THRESHOLD=0.5
- ### Note: Token renewal is automatically skipped for certain endpoints:
- ### - /health: Health check endpoint (no authentication required)
- ### - /documents/paginated: Frequently polled by client (5-30s interval)
- ### - /documents/pipeline_status: Very frequently polled by client (2s interval)
- ### - Rate limit: Minimum 60 seconds between renewals for same user
- ### API-Key to access LightRAG Server API
- ### Use this key in HTTP requests with the 'X-API-Key' header
- ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
- # LIGHTRAG_API_KEY=your-secure-api-key-here
- # WHITELIST_PATHS=/health,/api/*
- ######################################################################################
- ### Query Configuration
- ###
- ### How to control the context length sent to LLM:
- ### MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
- ### Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens
- ######################################################################################
- # LLM response cache for query (Not valid for streaming response)
- # ENABLE_LLM_CACHE=true
- # COSINE_THRESHOLD=0.2
- ### Number of entities or relations retrieved from KG
- # TOP_K=40
- ### Maximum number or chunks for naive vector search
- # CHUNK_TOP_K=20
- ### control the actual entities send to LLM
- # MAX_ENTITY_TOKENS=6000
- ### control the actual relations send to LLM
- # MAX_RELATION_TOKENS=8000
- ### control the maximum tokens send to LLM (include entities, relations and chunks)
- # MAX_TOTAL_TOKENS=30000
- ### chunk selection strategies
- ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
- ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
- ### If reranking is enabled, the impact of chunk selection strategies will be diminished.
- # KG_CHUNK_PICK_METHOD=VECTOR
- ### maximum number of related chunks per source entity or relation
- ### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
- ### Higher values increase re-ranking time
- # RELATED_CHUNK_NUMBER=5
- #########################################################
- ### Reranking configuration
- ### RERANK_BINDING type: null, cohere, jina, aliyun
- ### For rerank model deployed by vLLM use cohere binding
- ### If LightRAG deployed in Docker:
- ### uses host.docker.internal instead of localhost in RERANK_BINDING_HOST
- #########################################################
- RERANK_BINDING=null
- # RERANK_MODEL=BAAI/bge-reranker-v2-m3
- # RERANK_BINDING_HOST=http://localhost:8000/rerank
- # RERANK_BINDING_API_KEY=your_rerank_api_key_here
- ### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough)
- # MIN_RERANK_SCORE=0.0
- ### Enable rerank by default in query params when RERANK_BINDING is not null
- # RERANK_BY_DEFAULT=True
- ### Rerank concurrency and timeout (independent from base LLM settings)
- ### MAX_ASYNC_RERANK falls back to MAX_ASYNC when unset.
- ### RERANK_TIMEOUT has its own default (30s) since reranker calls are
- ### typically much shorter than full LLM generation.
- # MAX_ASYNC_RERANK=4
- # RERANK_TIMEOUT=30
- ### Cohere AI
- # # RERANK_MODEL=rerank-v3.5
- # # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
- # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
- ### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
- # RERANK_ENABLE_CHUNKING=true
- # RERANK_MAX_TOKENS_PER_DOC=480
- ### Aliyun Dashscope
- # # RERANK_MODEL=gte-rerank-v2
- # # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
- # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
- ### Jina AI
- # # RERANK_MODEL=jina-reranker-v2-base-multilingual
- # # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
- # # RERANK_BINDING_API_KEY=your_rerank_api_key_here
- ### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API)
- ### Wizard metadata used to preserve the chosen deployment provider across setup reruns
- # LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm
- # LIGHTRAG_SETUP_RERANK_PROVIDER=vllm
- # VLLM_EMBED_MODEL=BAAI/bge-m3
- # VLLM_EMBED_PORT=8001
- # VLLM_EMBED_DEVICE=cpu
- ### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank
- # VLLM_EMBED_API_KEY=
- # VLLM_EMBED_EXTRA_ARGS=
- # VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3
- # VLLM_RERANK_PORT=8000
- # VLLM_RERANK_DEVICE=cuda
- ### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank
- # VLLM_RERANK_API_KEY=
- ### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image.
- # VLLM_USE_CPU=1
- ### Set to 1 for CPU mode, unset for GPU mode
- # CUDA_VISIBLE_DEVICES=-1
- ### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode
- # NVIDIA_VISIBLE_DEVICES=0
- ### Optional Docker runtime equivalent; generated GPU compose honors either variable.
- # VLLM_RERANK_EXTRA_ARGS=
- ########################################
- ### Document processing configuration
- ########################################
- ### Document processing output language: English, Chinese, French, German ...
- SUMMARY_LANGUAGE=English
- ### Enable JSON-structured output for entity extraction
- ### Default behavior: JSON output is disabled when ENTITY_EXTRACTION_USE_JSON is unset
- ### JSON output incurs higher latency but delivers improved reliability
- ENTITY_EXTRACTION_USE_JSON=true
- ### Optional external YAML profile for entity type guidance and extraction examples
- ### Profiles are loaded from PROMPT_DIR/entity_type (PROMPT_DIR defaults to ./prompts).
- ### A reference template is shipped at prompts/samples/entity_type_prompt.sample.yml;
- # ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
- # PROMPT_DIR=<absolute_path_for_prompt_dir>
- ### Multimodal parsing/analyze integration
- ### Optional parser routing rules. Example for VLM & MinerU enabled configuration:
- ### LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
- ### Rules may be separated with commas or semicolons. Rules match file suffixes
- ### (pdf), not full names (*.pdf), and are checked left-to-right.
- ### If mineru/docling appears in LIGHTRAG_PARSER, the corresponding endpoint
- ### below must be configured before server startup.
- ### See docs/FileProcessingPipeline.md for detail
- LIGHTRAG_PARSER=*:native-teP,*:legacy-R
- ### Async parser service protocol (optional)
- ### Configure these when using remote MinerU/Docling async services
- ### ---- MinerU shared parameters (both local and official modes) ----
- ### MinerU API protocol. Choose one active mode.
- ### - official: MinerU precision API v4. Requires MINERU_API_TOKEN.
- ### - local: self-hosted mineru-api / mineru-router base URL.
- MINERU_API_MODE=local
- # MINERU_POLL_INTERVAL_SECONDS=2
- # MINERU_MAX_POLLS=180
- # MINERU_LANGUAGE=ch
- # MINERU_ENABLE_TABLE=true
- # MINERU_ENABLE_FORMULA=true
- # MINERU_PAGE_RANGES=
- ### MINERU_PAGE_RANGES semantics differ by mode:
- ### - official: forwarded verbatim, supports e.g. "1-3,5,7-9".
- ### - local: only a single page ("3") or simple range ("1-10"); comma
- ### lists are rejected at startup.
- ### When switching modes, double-check this constraint.
- ### ---- MinerU local-only (MINERU_API_MODE=local) ----
- MINERU_LOCAL_ENDPOINT=http://127.0.0.1:8000
- ### MINERU_LOCAL_BACKEND: which mineru-api backend handles the parse.
- ### Accepted values (per mineru-api POST /tasks form parameter `backend`):
- ### hybrid-auto-engine - pipeline + VLM combo with auto-selected local
- ### engine (mineru-api's default). GPU required.
- ### pipeline - CPU-friendly traditional pipeline; no VLM step.
- ### vlm-auto-engine - VLM with auto-selected local inference engine
- ### (sglang-engine / vllm-engine if GPU is available);
- ### requires the matching engine extra preinstalled
- ### on the mineru-api side, plus model weights.
- ### We ship `hybrid-auto-engine` -- requires the target mineru-api
- ### deployment to have a GPU plus the matching inference engine
- ### (sglang / vllm) and model weights installed. Switch to `pipeline`
- ### for CPU-only deployments without those dependencies.
- MINERU_LOCAL_BACKEND=hybrid-auto-engine
- ### MINERU_LOCAL_PARSE_METHOD: parsing strategy for the pipeline component.
- ### Accepted values:
- ### auto - auto-detect embedded text-layer vs OCR per page (default).
- ### txt - extract text from the embedded text layer only; fastest,
- ### but yields empty output on scanned PDFs without a text layer.
- ### ocr - force OCR on every page regardless of text-layer quality;
- ### slowest, reliable on scanned or low-quality PDFs.
- ### Only consumed when MINERU_LOCAL_BACKEND is `pipeline` or
- ### `hybrid-auto-engine` (the pipeline arm of the hybrid pipeline).
- ### Pure VLM backends (`vlm-auto-engine`, `vlm-http-client`) ignore this
- ### parameter -- the VLM model handles layout/OCR natively.
- MINERU_LOCAL_PARSE_METHOD=auto
- ### MINERU_LOCAL_IMAGE_ANALYSIS: enable VLM image/chart analysis pass for
- ### better caption an footnote recognition.
- ### Only consumed by `vlm-auto-engine`, `vlm-http-client`,
- ### `hybrid-auto-engine`, `hybrid-http-client`. The `pipeline` backend
- ### silently drops this flag -- its `_process_pipeline` does not accept
- ### the kwarg, so setting `false` under pipeline does NOT speed parsing
- ### up; pipeline never invokes the VLM image pass to begin with.
- ### Disable (`false`) on VLM / hybrid backends to skip the extra VLM
- ### round, trading image / chart semantic descriptions for faster parsing
- ### and lower GPU cost.
- MINERU_LOCAL_IMAGE_ANALYSIS=true
- # MINERU_LOCAL_START_PAGE_ID=0
- # MINERU_LOCAL_END_PAGE_ID=99999
- ### ---- MinerU official-only (MINERU_API_MODE=official) ----
- # MINERU_API_TOKEN=your-api-key
- # MINERU_OFFICIAL_ENDPOINT=https://mineru.net
- # MINERU_MODEL_VERSION=vlm
- # MINERU_IS_OCR=false
- ### Force re-upload of file to MinerU on every retry after failure
- ### Disables caching of result outcomes
- # LIGHTRAG_FORCE_REPARSE_MINERU=false
- ### Docling parser (docling-serve v1 / async API).
- ###
- ### Endpoint: base URL only — the client appends /v1/convert/file/async,
- ### /v1/status/poll/{task_id}?wait=<DOCLING_POLL_INTERVAL_SECONDS>,
- ### /v1/result/{task_id} itself.
- ### Pipeline shape (pipeline=standard, target_type=zip,
- ### to_formats=[json,md], image_export_mode=referenced) is fixed in
- ### code so the sidecar flow stays self-consistent — flipping any of
- ### these would break the adapter and is therefore not exposed as env.
- ###
- ### OCR tunables:
- ### - DOCLING_DO_OCR: master switch; when false the engine relies only on
- ### text-layer extraction.
- ### - DOCLING_FORCE_OCR: when true, OCR every page regardless of text-layer
- ### quality (slower, useful for scanned PDFs with bad text layers).
- ### - DOCLING_OCR_ENGINE: explicit engine selection (DEPRECATED in the
- ### docling-serve OpenAPI but still honored for older deployments).
- ### - DOCLING_OCR_PRESET: recommended replacement for DOCLING_OCR_ENGINE.
- ### - DOCLING_OCR_LANG: JSON array (e.g. ["en","zh"]) or comma-separated
- ### list. Empty (default) lets the OCR engine pick its default.
- ### - DOCLING_DO_FORMULA_ENRICHMENT: when true, the code-formula model runs
- ### and `texts[*].label="formula"` items carry LaTeX in `text`. Default
- ### false because the model may not be present on every deployment;
- ### adapter falls back to plain-text formulas when disabled.
- ###
- ### Polling budget (server-side long-poll; client does NOT add extra sleep):
- ### - DOCLING_POLL_INTERVAL_SECONDS: ``?wait=N`` value sent to
- ### /v1/status/poll/{task_id}. Larger N = fewer round trips per parse;
- ### bound by your reverse-proxy idle timeout. Default 5.
- ### - DOCLING_MAX_POLLS: max polling rounds before raising TimeoutError.
- ### Worst-case wall-clock budget ≈
- ### DOCLING_POLL_INTERVAL_SECONDS × DOCLING_MAX_POLLS. Default 240
- ### (≈ 20 minutes at wait=5s); raise for very large PDFs.
- ###
- ### Bundle cache controls:
- ### - DOCLING_ENGINE_VERSION: recorded in <base>.docling_raw/_manifest.json.
- ### Mismatch with the recorded value forces a cache miss → re-download.
- ### Leave empty to skip this check.
- ### - LIGHTRAG_FORCE_REPARSE_DOCLING: when truthy ("1"/"true"), bypass the
- ### docling raw cache and re-upload on every parse_docling call.
- ### - DOCLING_BBOX_ATTRIBUTES: override the doc-level bbox_attributes
- ### written into <base>.blocks.jsonl meta. Default
- ### {"origin":"LEFTBOTTOM"} matches docling's default coordinate system.
- DOCLING_ENDPOINT=http://localhost:5001
- DOCLING_DO_OCR=true
- DOCLING_FORCE_OCR=true
- DOCLING_DO_FORMULA_ENRICHMENT=false
- # DOCLING_OCR_ENGINE=auto
- # DOCLING_OCR_PRESET=auto
- # DOCLING_OCR_LANG=
- # DOCLING_POLL_INTERVAL_SECONDS=5
- # DOCLING_MAX_POLLS=240
- # DOCLING_BBOX_ATTRIBUTES={"origin":"LEFTBOTTOM"}
- ### Force re-upload of file to Docling on every retry after failure
- ### Disables caching of result outcomes
- # LIGHTRAG_FORCE_REPARSE_DOCLING=false
- ### File upload size limit (in bytes)
- ### Default: 104857600 (100MB)
- ### Set to 0 or None for unlimited upload size
- ### Examples:
- ### 52428800 = 50MB
- ### 104857600 = 100MB (default)
- ### 209715200 = 200MB
- ### Note: If using Nginx as reverse proxy, also configure client_max_body_size
- # MAX_UPLOAD_SIZE=104857600
- ### Global chunk size, 500~1500 is recommended.
- ### Chunker inherits the global value here only when its own var is unset.
- ### Exception: P never inherits CHUNK_SIZE — it uses CHUNK_P_SIZE (default 2000).
- # CHUNK_SIZE=1200
- # CHUNK_OVERLAP_SIZE=100
- ### Fixed-token chunker (process_options=F, default) settings
- ### CHUNK_F_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
- ### CHUNK_F_OVERLAP_SIZE: token overlap; falls back to CHUNK_OVERLAP_SIZE when unset
- ### CHUNK_F_SPLIT_BY_CHARACTER: optional separator string; pre-segment before token windowing
- ### CHUNK_F_SPLIT_BY_CHARACTER_ONLY: when true, raise on oversize segment instead of token re-split
- # CHUNK_F_SIZE=1200
- # CHUNK_F_OVERLAP_SIZE=100
- # CHUNK_F_SPLIT_BY_CHARACTER=
- # CHUNK_F_SPLIT_BY_CHARACTER_ONLY=false
- ### Recursive character chunker (process_options=R) settings
- ### CHUNK_R_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
- ### CHUNK_R_OVERLAP_SIZE: token overlap between adjacent chunks; falls back to CHUNK_OVERLAP_SIZE when unset
- ### CHUNK_R_SEPARATORS: JSON array of cascaded separators tried by RecursiveCharacterTextSplitter.
- ### Default includes CJK sentence-ending punctuation so Chinese / mixed-language
- ### documents split at semantic boundaries. Order: paragraph (\n\n) > line (\n) >
- ### Chinese sentence-end (。!?) > Chinese semi-clause (;,) > space > char.
- ### English ".?!" are intentionally omitted (literal match would split "0.95" /
- ### "e.g."); the English path falls through space / char as before.
- # CHUNK_R_SIZE=1200
- # CHUNK_R_OVERLAP_SIZE=100
- # CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
- ### Semantic vector chunker (process_options=V) settings
- ### CHUNK_V_SIZE: per-strategy chunk_token_size hard cap (oversized pieces are
- ### re-split via R before being emitted); falls back to CHUNK_SIZE when unset
- ### CHUNK_V_BREAKPOINT_THRESHOLD_TYPE: percentile | standard_deviation | interquartile | gradient
- ### CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT: leave empty to use the LangChain per-type default (e.g. 95 for percentile)
- ### CHUNK_V_BUFFER_SIZE: number of adjacent sentences combined when computing distances
- ### CHUNK_V_SENTENCE_SPLIT_REGEX: regex fed to LangChain SemanticChunker for the
- ### initial sentence split. Default extends the upstream English-only pattern
- ### with CJK sentence-end punctuation (。?!). Override if you need a
- ### different language mix. Note: env value is the raw regex string, no JSON
- ### quoting.
- # CHUNK_V_SIZE=1200
- # CHUNK_V_BREAKPOINT_THRESHOLD_TYPE=percentile
- # CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT=
- # CHUNK_V_BUFFER_SIZE=1
- # CHUNK_V_SENTENCE_SPLIT_REGEX=(?<=[.?!])\s+|(?<=[。?!])
- ### Paragraph semantic chunker (process_options=P) settings
- ### CHUNK_P_SIZE: per-strategy chunk_token_size override; defaults to 2000 when unset
- ### (does NOT fall back to CHUNK_SIZE — paragraph-semantic merging needs more
- ### headroom than the global default to keep related paragraphs together).
- ### CHUNK_P_OVERLAP_SIZE: overlap for prose fallback and table-bridge context;
- ### falls back to CHUNK_OVERLAP_SIZE when unset
- # CHUNK_P_SIZE=2000
- # CHUNK_P_OVERLAP_SIZE=100
- ### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended)
- # FORCE_LLM_SUMMARY_ON_MERGE=8
- ### Max description token size to trigger LLM summary
- # SUMMARY_MAX_TOKENS = 1200
- ### Recommended LLM summary output length in tokens
- # SUMMARY_LENGTH_RECOMMENDED=600
- ### Maximum context size sent to LLM for description summary
- # SUMMARY_CONTEXT_SIZE=12000
- ### Maximum token size allowed for entity extraction input context
- # MAX_EXTRACT_INPUT_TOKENS=20480
- ### Multimodal surrounding-context budget (per-half token cap for the
- ### `leading` / `trailing` text injected into VLM and extract prompts).
- ### Computed at analyze_multimodal entry; the two halves are independent
- ### so deployments can bias context forward or backward as needed.
- # SURROUNDING_LEADING_MAX_TOKENS=2000
- # SURROUNDING_TRAILING_MAX_TOKENS=2000
- ### Per-response cap on total entity+relationship rows/records emitted by the LLM
- # MAX_EXTRACTION_RECORDS=100
- ### Per-response cap on entity rows/objects emitted by the LLM
- # MAX_EXTRACTION_ENTITIES=40
- ### control the maximum chunk_ids stored in vector and graph db
- # MAX_SOURCE_IDS_PER_ENTITY=300
- # MAX_SOURCE_IDS_PER_RELATION=300
- ### control chunk_ids limitation method: FIFO, KEEP
- ### FIFO: First in first out
- ### KEEP: Keep oldest (less merge action and faster)
- # SOURCE_IDS_LIMIT_METHOD=FIFO
- ### Maximum number of file paths stored in entity/relation file_path field
- ### For displayed only, does not affect query performance
- # MAX_FILE_PATHS=100
- ### PDF decryption password for protected PDF files
- # PDF_DECRYPT_PASSWORD=your_pdf_password_here
- ### LLM cache for entity/relation extract is enable by default
- ### Disabling it will prevent graph reconstruction after document deletion
- # ENABLE_LLM_CACHE_FOR_EXTRACT=true
- ########################################
- ### Pipeline Concurrency Configuration
- ########################################
- ### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
- MAX_PARALLEL_INSERT=2
- ### Optional per-stage document pipeline concurrency
- # MAX_PARALLEL_PARSE_NATIVE=5
- # MAX_PARALLEL_PARSE_MINERU=1
- # MAX_PARALLEL_PARSE_DOCLING=1
- # MAX_PARALLEL_ANALYZE=5
- ### Optional queue sizes for staged pipeline workers
- # QUEUE_SIZE_DEFAULT=100
- # QUEUE_SIZE_INSERT=4
- ### Max concurrency requests for Embedding
- # EMBEDDING_FUNC_MAX_ASYNC=8
- ### Num of chunks send to Embedding in single request (default is 10)
- EMBEDDING_BATCH_NUM=32
- ###########################################################################
- ### Gloabal LLM Configuration
- ### LLM_BINDING type: openai, ollama, lollms, azure_openai, bedrock, gemini
- ### LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
- ### LLM_BINDING_API_KEY: api key
- ### If LightRAG deployed in Docker:
- ### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
- ###########################################################################
- ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
- # LLM_TIMEOUT=180
- LLM_BINDING=openai
- LLM_BINDING_HOST=https://api.openai.com/v1
- LLM_BINDING_API_KEY=your_api_key
- LLM_MODEL=gpt-5.4-mini
- ### Max concurrency requests of LLM
- MAX_ASYNC=4
- ###########################################################################
- ### Role-specific LLM/VLM overrides
- ### Available roles: EXTRACT, KEYWORD, QUERY, VLM
- ### If unset, each role falls back to gloabal LLM configuration above.
- ### For detail information, refer to:
- ### docs/RoleSpecificLLMConfiguration.md
- ### docs/RoleSpecificLLMConfiguration-zh.md
- ###########################################################################
- # KEYWORD_LLM_MODEL=gpt-5.4-nano
- # KEYWORD_MAX_ASYNC_LLM
- # KEYWORD_LLM_TIMEOUT=180
- # KEYWORD_LLM_BINDING=openai
- # KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
- # KEYWORD_LLM_BINDING_API_KEY=your_api_key
- # QUERY_LLM_MODEL=gpt-5.4
- # QUERY_MAX_ASYNC_LLM
- # QUERY_LLM_TIMEOUT=180
- # QUERY_LLM_BINDING=openai
- # QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
- # QUERY_LLM_BINDING_API_KEY=your_api_key
- # VLM_LLM_MODEL=gpt-5.4-mini
- # VLM_MAX_ASYNC_LLM=4
- # VLM_LLM_TIMEOUT=180
- # VLM_LLM_BINDING=openai
- # VLM_LLM_BINDING_HOST=https://api.example.com/v1
- # VLM_LLM_BINDING_API_KEY=your_vlm_api_key
- ### Master switch for VLM multimodal analysis (i/t/e items).
- ### When false, multimodal item is skipped regardless of document process_options
- ### When true, VLM_LLM_BINDING (or the base LLM_BINDING) must be vision-capable
- ### lollms is rejected at startup
- VLM_PROCESS_ENABLE=false
- ### Maximum image bytes sent to VLM (5242880=5MB)
- VLM_MAX_IMAGE_BYTES=5242880
- ###########################################################################
- ### Provider sepecific LLM options
- ### Increasing the temperature setting may help mitigate infinite inference
- ### loops during entity/elation extraction, particularly when using
- ### models with more limited capabilities, such as Qwen3-30B
- ### Set a max output token limit to prevent endless output from certain LLMs,
- ### which may trigger timeout errors during entity and relation extraction.
- ### max_output_token < LLM_TIMEOUT * llm_tokens_per_second
- ### i.e. max_output_token = 9000 = 180s * 50 tokens/s
- ### Sample commands to list all supported options specific LLM_BINDING:
- ### lightrag-server --llm-binding openai --help
- ### lightrag-server --llm-binding bedrock --help
- ### lightrag-server --llm-binding gemini --help
- ###########################################################################
- ### OpenAI Specific Parameters (Openrouter of other OpenAI compatible API):
- ### LLM_BINDING=openai
- ### LLM_BINDING_HOST=https://openrouter.ai/api/v1
- ### LLM_MODEL=google/gemini-2.5-flash
- # OPENAI_LLM_TEMPERATURE=0.9
- ### For vLLM/SGLang and most of OpenAI compatible API provider
- # OPENAI_LLM_MAX_TOKENS=9000
- ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
- # OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
- ### For OpenAI reason control
- # OPENAI_LLM_REASONING_EFFORT=minimal
- ### For OpenRouter reasoning control
- # OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
- ### For Qwen3 reasoning control deploy by vLLM
- # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
- ### Azure OpenAI Specific Parameters:
- ### LLM_BINDING=azure_openai
- ### LLM_BINDING_HOST=https://xxxx.openai.azure.com/
- ### LLM_BINDING_API_KEY=your_api_key
- ### LLM_MODEL=my-gpt-mini-deployment
- ### You may use deployment name for LLM_MODEL or set AZURE_OPENAI_DEPLOYMENT instead
- # AZURE_OPENAI_DEPLOYMEN=my—deplyment-name
- # AZURE_OPENAI_API_VERSION=2024-08-01-preview
- ### Google AI Studio Gemini Specific Parameters:
- ### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
- ### LLM_BINDING=gemini
- ### LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
- ### LLM_BINDING_API_KEY=your_gemini_api_key
- ### LLM_MODEL=gemini-flash-latest
- # GEMINI_LLM_TEMPERATURE=0.7
- # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
- ### Enable or disable thinking
- ### GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
- ### GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
- # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
- ### Google Vertex AI Gemini Specific Parameters:
- ### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
- # GOOGLE_GENAI_USE_VERTEXAI=true
- # GOOGLE_CLOUD_PROJECT='your-project-id'
- # GOOGLE_CLOUD_LOCATION='us-central1'
- # GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json'
- ### Bedrock Specific Parameters:
- ### LLM_BINDING=bedrock
- ### LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
- ### LLM_MODEL=us.amazon.nova-lite-v1:0
- ### Region is required for all three modes (Bedrock endpoints are regional).
- # AWS_REGION=us-west-1
- ### Bedrock Authentication (choose ONE of the following three approaches):
- ### Bedrock API key (bearer token). Bedrock ignores LLM_BINDING_API_KEY;
- ### set AWS_BEARER_TOKEN_BEDROCK directly before startup. This is a
- ### process-level AWS SDK setting and cannot be overridden per role.
- # AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
- ### SigV4 credentials (classic IAM user / STS / instance profile).
- # AWS_ACCESS_KEY_ID=your_aws_access_key_id
- # AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
- # AWS_SESSION_TOKEN=your_optional_aws_session_token
- ### Ambient credentials (AWS SDK default credential chain).
- ### To use this mode, leave AWS_BEARER_TOKEN_BEDROCK, AWS_ACCESS_KEY_ID,
- ### AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN above commented out — the
- ### AWS SDK will then resolve credentials from ~/.aws/credentials, IAM role,
- ### instance profile, SSO, or environment variables outside .env.
- ### Activating any of the lines above forces that explicit mode and bypasses
- ### the credential chain.
- # BEDROCK_LLM_TEMPERATURE=1.0
- # BEDROCK_LLM_MAX_TOKENS=9000
- # BEDROCK_LLM_TOP_P=1.0
- # BEDROCK_LLM_STOP_SEQUENCES='["</s>"]'
- ### Bedrock model reasoning control
- # BEDROCK_LLM_EXTRA_FIELDS='{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}'
- ### Ollama Specific Parameters:
- ### LLM_BINDING=ollama
- ### LLM_BINDING_HOST=http://localhost:11434
- ### LLM_MODEL=qwen3.5:9b
- ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
- OLLAMA_LLM_NUM_CTX=32768
- # OLLAMA_LLM_NUM_PREDICT=9000
- # OLLAMA_LLM_TEMPERATURE=0.85
- # OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'
- #######################################################################################
- ### Embedding Configuration (Should not be changed after the first file processed)
- ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, bedrock
- ### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
- ### EMBEDDING_BINDING_API_KEY: api key
- ### If LightRAG deployed in Docker:
- ### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
- ### Control whether to send embedding_dim parameter to embedding API
- ### For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment
- ### For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter
- ### For Gemini: Allways set EMBEDDING_SEND_DIM=true
- ### Control whether to use base64 encoding format for embeddings (improves performance for OpenAI)
- ### For OpenAI: Set EMBEDDING_USE_BASE64=true (default) to use base64 encoding
- ### For Yandex Cloud and other providers that don't support it: Set EMBEDDING_USE_BASE64=false
- #######################################################################################
- # EMBEDDING_TIMEOUT=30
- ### OpenAI compatible embedding
- EMBEDDING_BINDING=openai
- EMBEDDING_BINDING_HOST=https://api.openai.com/v1
- EMBEDDING_BINDING_API_KEY=your_api_key
- EMBEDDING_MODEL=text-embedding-3-large
- EMBEDDING_DIM=3072
- EMBEDDING_TOKEN_LIMIT=8192
- EMBEDDING_SEND_DIM=false
- EMBEDDING_USE_BASE64=true
- ### Optional: asymmetric embeddings (query/document behavior split)
- ### Leave EMBEDDING_ASYMMETRIC unset or set false to keep symmetric behavior.
- ### Set true only when the selected embedding backend supports asymmetric mode.
- # EMBEDDING_ASYMMETRIC=true
- ### Provider-task bindings such as Jina/Gemini/VoyageAI use provider parameters
- ### and should not configure the prefix variables below.
- ### Prefix-based models such as BGE/E5/GTE require both prefix variables.
- ### Wrap non-empty values with quotes if there are trailing spaces.
- # EMBEDDING_DOCUMENT_PREFIX="search_document: "
- ### Use NO_PREFIX for a side that should intentionally have no prefix.
- ### EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
- # EMBEDDING_QUERY_PREFIX="search_query: "
- ###########################################################################
- ### Provider sepecific Embedding options
- ### Increasing the temperature setting may help mitigate infinite inference
- ### loops during entity/elation extraction, particularly when using
- ### models with more limited capabilities, such as Qwen3-30B
- ### Set a max output token limit to prevent endless output from certain LLMs,
- ### which may trigger timeout errors during entity and relation extraction.
- ### max_output_token < LLM_TIMEOUT * llm_tokens_per_second
- ### i.e. max_output_token = 9000 = 180s * 50 tokens/s
- ### Sample commands to list all supported options specific EMBEDDING_BINDING:
- ### lightrag-server --embedding-binding openai --help
- ### lightrag-server --embedding-binding ollama --help
- ### lightrag-server --embedding-binding bedrock --help
- ###########################################################################
- ### Azure Embedding Specific Parameters:
- ### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
- ### EMBEDDING_BINDING=azure_openai
- ### EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
- ### EMBEDDING_API_KEY=your_api_key
- ### EMBEDDING_MODEL==my-text-embedding-3-large-deployment
- ### EMBEDDING_DIM=3072
- # AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
- ### Ollama Embedding Specific Parameters:
- ### EMBEDDING_BINDING=ollama
- ### EMBEDDING_BINDING_HOST=http://localhost:11434
- ### EMBEDDING_BINDING_API_KEY=your_api_key
- ### EMBEDDING_MODEL=qwen3-embedding:4b
- ### EMBEDDING_DIM=2560
- ### Ollama should set num_ctx option inaddition to EMBEDDING_TOKEN_LIMIT
- OLLAMA_EMBEDDING_NUM_CTX=8192
- ### Gemini Embedding Specific Parameters:
- ### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
- ### Gemini embedding requires sending dimension to server
- ### EMBEDDING_BINDING=gemini
- ### EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
- ### EMBEDDING_BINDING_API_KEY=your_api_key
- ### EMBEDDING_MODEL=gemini-embedding-001
- ### EMBEDDING_DIM=1536
- ### EMBEDDING_TOKEN_LIMIT=2048
- ### EMBEDDING_SEND_DIM=true
- ### Bedrock Embedding Specific Parameters:
- ### EMBEDDING_BINDING=bedrock
- ### EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
- ### EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
- ### EMBEDDING_DIM=1024
- ### Share the same region and authentication settings as LLMs, no reconfiguration here
- ### AWS_REGION=us-west-1
- ### AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
- ### AWS_ACCESS_KEY_ID=your_aws_access_key_id
- ### AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
- ### AWS_SESSION_TOKEN=your_optional_aws_session_token
- ### Jina AI Embedding Specific Parameters:
- ### EMBEDDING_BINDING=jina
- ### EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
- ### EMBEDDING_MODEL=jina-embeddings-v4
- ### EMBEDDING_DIM=2048
- ### EMBEDDING_BINDING_API_KEY=your_api_key
- ####################################################################
- ### WORKSPACE sets workspace name for all storage types
- ### for the purpose of isolating data from LightRAG instances.
- ### Valid workspace name constraints: a-z, A-Z, 0-9, and _
- ####################################################################
- # WORKSPACE=
- ############################
- ### Data storage selection
- ############################
- ### Default storage: JSON/Nano/NetworkX (Recommended for test deployment)
- LIGHTRAG_KV_STORAGE=JsonKVStorage
- LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
- LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
- LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
- ### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns
- # LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable
- # LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker
- # LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker
- ### PostgreSQL Configuration
- POSTGRES_HOST=localhost
- POSTGRES_PORT=5432
- POSTGRES_USER=your_username
- POSTGRES_PASSWORD='your_password'
- POSTGRES_DATABASE=rag
- POSTGRES_MAX_CONNECTIONS=25
- ### DB specific workspace should not be set, keep for compatible only
- # POSTGRES_WORKSPACE=forced_workspace_name
- ### Use HNSW_HALFVEC for large embeddings (2000+ dim).
- ### Requires pgvector extension >= 0.7.0.
- ### Vector storage type: HNSW, HNSW_HALFVEC, IVFFlat, VCHORDRQ
- POSTGRES_VECTOR_INDEX_TYPE=HNSW
- POSTGRES_HNSW_M=16
- POSTGRES_HNSW_EF=200
- POSTGRES_IVFFLAT_LISTS=100
- POSTGRES_VCHORDRQ_BUILD_OPTIONS=
- POSTGRES_VCHORDRQ_PROBES=
- POSTGRES_VCHORDRQ_EPSILON=1.9
- ### PostgreSQL Connection Retry Configuration (Network Robustness)
- ### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time
- ### These defaults provide out-of-the-box support for PostgreSQL High Availability setups
- ###
- ### Number of retry attempts (1-100, default: 10)
- ### - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios)
- ### - For extreme cases: increase up to 20-50
- ### Initial retry backoff in seconds (0.1-300.0, default: 3.0)
- ### - Default 3.0s provides reasonable initial delay for switchover detection
- ### - For faster recovery: decrease to 1.0-2.0
- ### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0)
- ### - Default 30.0s matches typical switchover completion time
- ### - For longer switchovers: increase to 60-90
- ### Connection pool close timeout in seconds (1.0-30.0, default: 5.0)
- # POSTGRES_CONNECTION_RETRIES=10
- # POSTGRES_CONNECTION_RETRY_BACKOFF=3.0
- # POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0
- # POSTGRES_POOL_CLOSE_TIMEOUT=5.0
- ### PostgreSQL SSL Configuration (Optional)
- # POSTGRES_SSL_MODE=require
- # POSTGRES_SSL_CERT=/path/to/client-cert.pem
- # POSTGRES_SSL_KEY=/path/to/client-key.pem
- # POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
- # POSTGRES_SSL_CRL=/path/to/crl.pem
- ### PostgreSQL Server Settings (for Supabase Supavisor)
- # Use this to pass extra options to the PostgreSQL connection string.
- # For Supabase, you might need to set it like this:
- # POSTGRES_SERVER_SETTINGS='options=reference%3D[project-ref]'
- # Default is 100 set to 0 to disable
- # POSTGRES_STATEMENT_CACHE_SIZE=100
- ### Neo4j Configuration
- NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD='your_password'
- NEO4J_DATABASE=neo4j
- NEO4J_MAX_CONNECTION_POOL_SIZE=100
- NEO4J_CONNECTION_TIMEOUT=30
- NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
- NEO4J_MAX_TRANSACTION_RETRY_TIME=30
- NEO4J_MAX_CONNECTION_LIFETIME=300
- NEO4J_LIVENESS_CHECK_TIMEOUT=30
- NEO4J_KEEP_ALIVE=true
- ### DB specific workspace should not be set, keep for compatible only
- # NEO4J_WORKSPACE=forced_workspace_name
- ### MongoDB Configuration
- # For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with
- # Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local.
- MONGO_URI=mongodb://localhost:27017/
- MONGO_DATABASE=LightRAG
- ### DB specific workspace should not be set, keep for compatible only
- # MONGODB_WORKSPACE=forced_workspace_name
- # Community/local Docker MongoDB example for KV, graph, or doc-status storage only:
- # MONGO_URI=mongodb://localhost:27017/
- ### OpenSearch Configuration
- ### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus
- ### Connection settings (comma-separated host:port entries; do not include http:// or https://)
- ### This setup wizard supports authenticated OpenSearch clusters only.
- ### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS.
- OPENSEARCH_HOSTS=localhost:9200
- OPENSEARCH_USER=admin
- OPENSEARCH_PASSWORD=LightRAG2026_!@
- OPENSEARCH_USE_SSL=true
- OPENSEARCH_VERIFY_CERTS=false
- # OPENSEARCH_TIMEOUT=30
- # OPENSEARCH_MAX_RETRIES=3
- ### Index Settings (for 3-AZ Amazon OpenSearch Service, set replicas to 2)
- # OPENSEARCH_NUMBER_OF_SHARDS=1
- # OPENSEARCH_NUMBER_OF_REPLICAS=0
- ### k-NN Settings for Vector Storage (HNSW algorithm)
- # OPENSEARCH_KNN_EF_CONSTRUCTION=200
- # OPENSEARCH_KNN_M=16
- # OPENSEARCH_KNN_EF_SEARCH=100
- ### PPL graphlookup for server-side graph traversal (auto-detected if not set)
- # OPENSEARCH_USE_PPL_GRAPHLOOKUP=true
- ### DB specific workspace should not be set, keep for compatible only
- # OPENSEARCH_WORKSPACE=forced_workspace_name
- ### Milvus Configuration
- MILVUS_URI=http://localhost:19530
- MILVUS_DB_NAME=lightrag
- # MILVUS_DEVICE=cpu
- # MILVUS_USER=root
- # MILVUS_PASSWORD=your_password
- # MILVUS_TOKEN=your_token
- # Required for the bundled Docker Milvus stack; may come from .env or exported shell variables.
- # MINIO_ACCESS_KEY_ID=minioadmin
- # MINIO_SECRET_ACCESS_KEY=minioadmin
- ### DB specific workspace should not be set, keep for compatible only
- # MILVUS_WORKSPACE=forced_workspace_name
- ### Milvus Vector Index Configuration
- ### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN
- # MILVUS_INDEX_TYPE=AUTOINDEX
- ### Metric type: COSINE (default), L2, IP
- # MILVUS_METRIC_TYPE=COSINE
- ### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults)
- ### M: Maximum number of connections per node [2-2048], default 16
- # MILVUS_HNSW_M=16
- ### efConstruction: Size of dynamic candidate list during build [8-512], default 360
- # MILVUS_HNSW_EF_CONSTRUCTION=360
- ### ef: Size of dynamic candidate list during search, default 200
- # MILVUS_HNSW_EF=200
- ### HNSW_SQ Specific Parameters (requires Milvus 2.6.8+)
- ### sq_type: Scalar quantization type - SQ4U, SQ6, SQ8 (default), BF16, FP16
- # MILVUS_HNSW_SQ_TYPE=SQ8
- ### refine: Enable refinement step for higher precision, default false
- # MILVUS_HNSW_SQ_REFINE=false
- ### refine_type: Refinement precision (must be higher than sq_type) - SQ6, SQ8, BF16, FP16, FP32
- # MILVUS_HNSW_SQ_REFINE_TYPE=FP32
- ### refine_k: Refinement expansion factor, default 10
- # MILVUS_HNSW_SQ_REFINE_K=10
- ### IVF_FLAT / IVF_SQ8 Parameters
- ### nlist: Number of cluster units [1-65536], recommended sqrt(n) for n>1M, default 1024
- # MILVUS_IVF_NLIST=1024
- ### nprobe: Number of units to query [1-nlist], default 16
- # MILVUS_IVF_NPROBE=16
- ### Qdrant
- QDRANT_URL=http://localhost:6333
- # QDRANT_DEVICE=cpu
- # QDRANT_API_KEY=your-api-key
- ### Qdrant upsert batching (enabled by default)
- ### Split large upserts by estimated JSON payload size and point count
- ### Default 16MB keeps safe headroom below common 32MB gateway/request limits
- # QDRANT_UPSERT_MAX_PAYLOAD_BYTES=16777216
- # QDRANT_UPSERT_MAX_POINTS_PER_BATCH=128
- ### DB specific workspace should not be set, keep for compatible only
- # QDRANT_WORKSPACE=forced_workspace_name
- ### Redis
- REDIS_URI=redis://localhost:6379
- REDIS_SOCKET_TIMEOUT=30
- REDIS_CONNECT_TIMEOUT=10
- REDIS_MAX_CONNECTIONS=100
- REDIS_RETRY_ATTEMPTS=3
- ### DB specific workspace should not be set, keep for compatible only
- # REDIS_WORKSPACE=forced_workspace_name
- ### Memgraph Configuration
- MEMGRAPH_URI=bolt://localhost:7687
- MEMGRAPH_USERNAME=
- MEMGRAPH_PASSWORD=
- MEMGRAPH_DATABASE=memgraph
- ### DB specific workspace should not be set, keep for compatible only
- # MEMGRAPH_WORKSPACE=forced_workspace_name
- ###########################################################
- ### Langfuse Observability Configuration
- ### Only works with LLM provided by OpenAI compatible API
- ### Install with: pip install lightrag-hku[observability]
- ### Sign up at: https://cloud.langfuse.com or self-host
- ###########################################################
- # LANGFUSE_SECRET_KEY=''
- # LANGFUSE_PUBLIC_KEY=''
- # LANGFUSE_HOST='https://cloud.langfuse.com'
- # LANGFUSE_ENABLE_TRACE=true
- ############################
- ### Evaluation Configuration
- ############################
- ### RAGAS evaluation models (used for RAG quality assessment)
- ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
- ### Default uses OpenAI models for evaluation
- ### LLM Configuration for Evaluation
- # EVAL_LLM_MODEL=gpt-4o-mini
- ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
- # EVAL_LLM_BINDING_API_KEY=your_api_key
- ### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
- # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
- ### Embedding Configuration for Evaluation
- # EVAL_EMBEDDING_MODEL=text-embedding-3-large
- ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
- # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
- ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
- # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
- ### Performance Tuning
- ### Number of concurrent test case evaluations
- ### Lower values reduce API rate limit issues but increase evaluation time
- # EVAL_MAX_CONCURRENT=2
- ### TOP_K query parameter of LightRAG (default: 10)
- ### Number of entities or relations retrieved from KG
- # EVAL_QUERY_TOP_K=10
- ### LLM request retry and timeout settings for evaluation
- # EVAL_LLM_MAX_RETRIES=5
- # EVAL_LLM_TIMEOUT=180
- ##########################################################################
- ### ----- Preserved custom environment variables from previous .env -----
- ### ----- Comments in this session will persist across regenerations -----
- ### (This must be the final session; ensure the preceding lines unchanged)
- ##########################################################################
- ### The "make env*" wizard will leave the following lines unchanged
- ### You may add additional env vars or commnets here for your own purpose
- ##########################################################################
- ### AWS Bedrock
- # LLM_BINDING=bedrock
- # LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
- # LLM_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
- ### ----- Extra setting from previous .env -----
|