Complete documentation of EVERY configuration option, file, environment variable, and setting in Capcat.
Source: Application/core/config.py, Application/docs/configuration.md
Settings are applied in this order (highest to lowest priority):
--count 10)export CAPCAT_MAX_WORKERS=16)capcat.yml, capcat.json)# capcat.yml has: default_count: 20
# Environment: export CAPCAT_DEFAULT_COUNT=40
# Command: ./capcat fetch hn --count 10
# Result: Uses 10 (CLI wins)
--config FILE flag./capcat.yml (current directory)./capcat.json (current directory)~/.capcat/capcat.yml (user home)~/.capcat/capcat.json (user home).yml or .yaml - YAML format (recommended).json - JSON formatComplete YAML configuration with ALL options:
# Application/capcat.yml - Complete configuration
# ============================================================
# NETWORK CONFIGURATION
# ============================================================
network:
# HTTP Timeouts (seconds)
connect_timeout: 10 # Connection establishment timeout
read_timeout: 30 # Response read timeout (increased for complex articles)
media_download_timeout: 60 # Media file download timeout
head_request_timeout: 10 # HEAD request timeout for size checks
# Connection Pooling
pool_connections: 20 # Number of connection pools to cache
pool_maxsize: 20 # Maximum connections per pool
# User Agent
user_agent: "Capcat/2.0 (Personal news archiver)"
# Request Retries
max_retries: 3 # Maximum retry attempts for failed requests
retry_delay: 1.0 # Base delay between retries (seconds)
# ============================================================
# PROCESSING CONFIGURATION
# ============================================================
processing:
# Concurrency Settings
max_workers: 8 # Parallel ThreadPoolExecutor workers (1-32)
# File Handling
max_filename_length: 100 # Maximum filename length before truncation
# Content Processing
remove_script_tags: true # Remove <script> tags from content
remove_style_tags: true # Remove <style> tags from content
remove_nav_tags: true # Remove <nav> tags from content
# Media Processing
download_images: true # Always download images (embedded in articles)
download_videos: false # Download video files (requires --media flag)
download_audio: false # Download audio files (requires --media flag)
download_documents: false # Download PDF/document files (requires --media flag)
# Output Settings
create_comments_file: true # Create separate comments.md file
markdown_line_breaks: true # Preserve line breaks in markdown
# ============================================================
# LOGGING CONFIGURATION
# ============================================================
logging:
# Log Levels (DEBUG, INFO, WARNING, ERROR, CRITICAL)
default_level: "INFO" # Default log level for all loggers
file_level: "DEBUG" # Log level for file output
console_level: "INFO" # Log level for console output
# Log Formatting
use_colors: true # Colored console output
include_timestamps: true # Include timestamps in logs
include_module_names: true # Include module/function names in logs
# File Logging
auto_create_log_dir: true # Automatically create log directories
max_log_file_size: 10485760 # Max log file size in bytes (10MB)
log_file_backup_count: 5 # Number of backup log files to keep
# ============================================================
# UI CONFIGURATION
# ============================================================
ui:
# Progress Animations
progress_spinner_style: "dots" # Spinner style: dots, wave, loading, pulse, bounce, modern
batch_spinner_style: "activity" # Batch spinner: activity, progress, pulse, wave, dots, scan
progress_bar_width: 25 # Width of progress bars in characters
show_progress_animations: true # Enable/disable progress animations
# Visual Feedback
use_emojis: true # Use emojis in output (disable per CLAUDE.md)
use_colors: true # Colored terminal output
show_detailed_progress: false # Show detailed article-level progress
Complete JSON configuration (equivalent to YAML above):
{
"network": {
"connect_timeout": 10,
"read_timeout": 30,
"media_download_timeout": 60,
"head_request_timeout": 10,
"pool_connections": 20,
"pool_maxsize": 20,
"user_agent": "Capcat/2.0 (Personal news archiver)",
"max_retries": 3,
"retry_delay": 1.0
},
"processing": {
"max_workers": 8,
"max_filename_length": 100,
"remove_script_tags": true,
"remove_style_tags": true,
"remove_nav_tags": true,
"download_images": true,
"download_videos": false,
"download_audio": false,
"download_documents": false,
"create_comments_file": true,
"markdown_line_breaks": true
},
"logging": {
"default_level": "INFO",
"file_level": "DEBUG",
"console_level": "INFO",
"use_colors": true,
"include_timestamps": true,
"include_module_names": true,
"auto_create_log_dir": true,
"max_log_file_size": 10485760,
"log_file_backup_count": 5
},
"ui": {
"progress_spinner_style": "dots",
"batch_spinner_style": "activity",
"progress_bar_width": 25,
"show_progress_animations": true,
"use_emojis": true,
"use_colors": true,
"show_detailed_progress": false
}
}
Source: Application/core/config.py
Location: Application/core/config.py:18
from core.config import NetworkConfig
network = NetworkConfig(
connect_timeout=15,
read_timeout=45,
max_retries=5
)
Location: Application/core/config.py:42
from core.config import ProcessingConfig
processing = ProcessingConfig(
max_workers=16,
download_videos=True,
max_filename_length=150
)
Location: Application/core/config.py:68
dots: Simple rotating dotswave: Wave animationloading: Loading barpulse: Pulsing indicatorbounce: Bouncing animationmodern: Modern spinneractivity: Activity indicatorprogress: Progress indicatorscan: Scanning animationfrom core.config import UIConfig
ui = UIConfig(
progress_spinner_style="wave",
use_emojis=False, # Per CLAUDE.md requirement
show_detailed_progress=True
)
Location: Application/core/config.py:88
DEBUG: Detailed debugging information (most verbose)INFO: General information messages (default)WARNING: Warning messagesERROR: Error messages onlyCRITICAL: Critical errors only (least verbose)from core.config import LoggingConfig
logging = LoggingConfig(
default_level="DEBUG",
use_colors=False,
max_log_file_size=20971520 # 20MB
)
Location: Application/core/config.py:108
@dataclass
class FetchNewsConfig:
network: NetworkConfig = None
processing: ProcessingConfig = None
logging: LoggingConfig = None
ui: UIConfig = None
def __post_init__(self):
"""Initialize sub-configs if not provided."""
if self.network is None:
self.network = NetworkConfig()
if self.processing is None:
self.processing = ProcessingConfig()
if self.logging is None:
self.logging = LoggingConfig()
if self.ui is None:
self.ui = UIConfig()
def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary."""
return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "FetchNewsConfig":
"""Create configuration from dictionary."""
network_data = data.get("network", {})
processing_data = data.get("processing", {})
logging_data = data.get("logging", {})
return cls(
network=NetworkConfig(**network_data),
processing=ProcessingConfig(**processing_data),
logging=LoggingConfig(**logging_data),
)
from core.config import FetchNewsConfig, NetworkConfig, ProcessingConfig
config = FetchNewsConfig(
network=NetworkConfig(connect_timeout=15),
processing=ProcessingConfig(max_workers=16)
)
# Convert to dictionary
config_dict = config.to_dict()
# Create from dictionary
loaded_config = FetchNewsConfig.from_dict(config_dict)
All configuration options can be overridden with environment variables using CAPCAT_ prefix.
CAPCAT_<SECTION>_<FIELD>
# Network settings
export CAPCAT_NETWORK_CONNECT_TIMEOUT=15
export CAPCAT_NETWORK_READ_TIMEOUT=45
export CAPCAT_NETWORK_USER_AGENT="MyBot/1.0"
export CAPCAT_NETWORK_MAX_RETRIES=5
export CAPCAT_NETWORK_RETRY_DELAY=2.0
# Processing settings
export CAPCAT_PROCESSING_MAX_WORKERS=16
export CAPCAT_PROCESSING_DOWNLOAD_VIDEOS=true
export CAPCAT_PROCESSING_MAX_FILENAME_LENGTH=150
# Logging settings
export CAPCAT_LOGGING_DEFAULT_LEVEL=DEBUG
export CAPCAT_LOGGING_USE_COLORS=false
export CAPCAT_LOGGING_FILE_LEVEL=INFO
# UI settings
export CAPCAT_UI_PROGRESS_SPINNER_STYLE=wave
export CAPCAT_UI_USE_EMOJIS=false
export CAPCAT_UI_SHOW_DETAILED_PROGRESS=true
CAPCAT_NETWORK_CONNECT_TIMEOUT=10
CAPCAT_NETWORK_READ_TIMEOUT=30
CAPCAT_NETWORK_MEDIA_DOWNLOAD_TIMEOUT=60
CAPCAT_NETWORK_HEAD_REQUEST_TIMEOUT=10
CAPCAT_NETWORK_POOL_CONNECTIONS=20
CAPCAT_NETWORK_POOL_MAXSIZE=20
CAPCAT_NETWORK_USER_AGENT="Capcat/2.0 (Personal news archiver)"
CAPCAT_NETWORK_MAX_RETRIES=3
CAPCAT_NETWORK_RETRY_DELAY=1.0
CAPCAT_PROCESSING_MAX_WORKERS=8
CAPCAT_PROCESSING_MAX_FILENAME_LENGTH=100
CAPCAT_PROCESSING_REMOVE_SCRIPT_TAGS=true
CAPCAT_PROCESSING_REMOVE_STYLE_TAGS=true
CAPCAT_PROCESSING_REMOVE_NAV_TAGS=true
CAPCAT_PROCESSING_DOWNLOAD_IMAGES=true
CAPCAT_PROCESSING_DOWNLOAD_VIDEOS=false
CAPCAT_PROCESSING_DOWNLOAD_AUDIO=false
CAPCAT_PROCESSING_DOWNLOAD_DOCUMENTS=false
CAPCAT_PROCESSING_CREATE_COMMENTS_FILE=true
CAPCAT_PROCESSING_MARKDOWN_LINE_BREAKS=true
CAPCAT_LOGGING_DEFAULT_LEVEL="INFO"
CAPCAT_LOGGING_FILE_LEVEL="DEBUG"
CAPCAT_LOGGING_CONSOLE_LEVEL="INFO"
CAPCAT_LOGGING_USE_COLORS=true
CAPCAT_LOGGING_INCLUDE_TIMESTAMPS=true
CAPCAT_LOGGING_INCLUDE_MODULE_NAMES=true
CAPCAT_LOGGING_AUTO_CREATE_LOG_DIR=true
CAPCAT_LOGGING_MAX_LOG_FILE_SIZE=10485760
CAPCAT_LOGGING_LOG_FILE_BACKUP_COUNT=5
CAPCAT_UI_PROGRESS_SPINNER_STYLE="dots"
CAPCAT_UI_BATCH_SPINNER_STYLE="activity"
CAPCAT_UI_PROGRESS_BAR_WIDTH=25
CAPCAT_UI_SHOW_PROGRESS_ANIMATIONS=true
CAPCAT_UI_USE_EMOJIS=true
CAPCAT_UI_USE_COLORS=true
CAPCAT_UI_SHOW_DETAILED_PROGRESS=false
# Override for single command
CAPCAT_PROCESSING_MAX_WORKERS=16 ./capcat bundle tech
# Override multiple settings
CAPCAT_LOGGING_DEFAULT_LEVEL=DEBUG \
CAPCAT_PROCESSING_MAX_WORKERS=16 \
./capcat fetch hn --count 20
# Set for current shell session
export CAPCAT_PROCESSING_MAX_WORKERS=16
export CAPCAT_LOGGING_DEFAULT_LEVEL=DEBUG
./capcat bundle tech
./capcat fetch hn
# Add to ~/.bashrc or ~/.zshrc
echo 'export CAPCAT_PROCESSING_MAX_WORKERS=16' >> ~/.bashrc
echo 'export CAPCAT_LOGGING_DEFAULT_LEVEL=INFO' >> ~/.bashrc
Source: Application/sources/active/bundles.yml
Complete bundle configuration:
bundles:
tech:
description: "Consumer technology news sources"
sources:
- ieee
- mashable
default_count: 30
techpro:
description: "Professional developer news sources"
sources:
- hn
- lb
- iq
default_count: 30
news:
description: "General news sources"
sources:
- bbc
- guardian
default_count: 25
science:
description: "Science and research sources"
sources:
- nature
- scientificamerican
default_count: 20
ai:
description: "AI, Machine Learning, and Rationality sources"
sources:
- mitnews
default_count: 20
sports:
description: "World sports news sources"
sources:
- bbcsport
default_count: 25
all:
description: "All available sources"
sources: [] # Populated dynamically from registry
default_count: 10
description (str) - Human-readable bundle descriptionsources (list) - List of source IDs in bundledefault_count (int) - Default article count (overridden by --count flag)Bundles automatically include sources with matching category:
# Bundle named "tech"
tech:
sources:
- ieee # Explicit
- mashable # Explicit
# All sources with category: tech are automatically included:
# - gizmodo (category: tech, not in bundles.yml)
# - futurism (category: tech, not in bundles.yml)
category_sources = registry.get_sources_by_category(name)
for source_id in category_sources:
if source_id not in bundle_sources:
bundle_sources.append(source_id)
# Edit bundles file
vim sources/active/bundles.yml
# Add new bundle
custom:
description: "My custom bundle"
sources:
- hn
- bbc
- nature
default_count: 15
./capcat catch
# Select: Manage Sources
# Select: Manage Bundles
# Select: Create New Bundle
Location: sources/active/config_driven/configs/*.yaml
# sources/active/config_driven/configs/example.yaml
display_name: "Example News"
base_url: "https://example.com/"
category: "tech"
timeout: 10.0
rate_limit: 1.0
article_selectors:
- ".headline a"
content_selectors:
- ".article-content"
# Complete config-driven source
display_name: "Complete Example"
base_url: "https://example.com/"
category: "tech"
timeout: 15.0
rate_limit: 2.0
supports_comments: false
# RSS Configuration
rss_config:
feed_url: "https://example.com/feed.xml"
use_rss_content: true
content_field: "description"
# Article Discovery
article_selectors:
- ".headline a"
- ".article-title a"
- "h2.title a"
# Content Extraction
content_selectors:
- ".article-content"
- ".post-body"
- "div.content"
# URL Filtering
skip_patterns:
- "/about"
- "/contact"
- "/advertising"
- "?utm_"
- "/sponsored"
# Image Processing
image_processing:
selectors:
- "img"
- ".content img"
- "article img"
url_patterns:
- "example.com/"
- "cdn.example.com/"
allow_extensionless: true
skip_selectors:
- ".sidebar img"
- ".navigation img"
- ".header img"
# Custom Headers
custom_config:
headers:
Accept: "text/html,application/xhtml+xml"
Accept-Language: "en-US,en;q=0.5"
user_agent: "Custom Bot 1.0"
# Metadata Extraction
meta_selectors:
author: ".byline .author"
date: ".publish-date"
tags: ".article-tags a"
# Content Cleaning
remove_selectors:
- ".advertisement"
- ".related-links"
- ".social-share"
# Template Configuration
template:
variant: "article-no-comments"
navigation:
back_to_news_url: "../../news.html"
back_to_news_text: "Back to News"
has_comments: false
Location: sources/active/custom/<source-name>/config.yaml
# sources/active/custom/hn/config.yaml
display_name: "Hacker News"
base_url: "https://news.ycombinator.com/"
category: "tech"
timeout: 10.0
rate_limit: 1.0
supports_comments: true
template:
variant: "article-with-comments"
navigation:
back_to_news_url: "../../news.html"
back_to_news_text: "Back to News"
has_comments: true
comments_url: "comments.html"
comments_text: "View Comments"
Location: Application/core/config.py:161
def __init__(self):
"""Initialize the configuration manager."""
self.logger = get_logger(__name__)
self._config = FetchNewsConfig()
self._config_loaded = False
def load_config(
self, config_file: Optional[str] = None, load_env: bool = True
) -> FetchNewsConfig:
"""Load configuration from files and environment variables.
Args:
config_file: Path to config file (JSON or YAML)
load_env: Whether to load environment variables
Returns:
Loaded configuration instance
"""
from core.config import ConfigManager
manager = ConfigManager()
# Load from default locations
config = manager.load_config()
# Load from specific file
config = manager.load_config(config_file="custom.yml")
# Load without environment variables
config = manager.load_config(load_env=False)
def get_config() -> FetchNewsConfig:
"""Get the global configuration instance."""
global _config_manager
if _config_manager is None:
_config_manager = ConfigManager()
return _config_manager.load_config()
def load_config(config_file: Optional[str] = None) -> FetchNewsConfig:
"""Load configuration from file or defaults."""
global _config_manager
if _config_manager is None:
_config_manager = ConfigManager()
return _config_manager.load_config(config_file)
from core.config import get_config, load_config
# Get global config
config = get_config()
# Load specific config
config = load_config("custom.yml")
network:
connect_timeout: 15
read_timeout: 60
media_download_timeout: 120
max_retries: 5
processing:
max_workers: 16
download_videos: true
download_audio: true
download_documents: true
logging:
default_level: "WARNING"
use_colors: true
network:
connect_timeout: 30
max_retries: 1
processing:
max_workers: 4
logging:
default_level: "DEBUG"
file_level: "DEBUG"
console_level: "DEBUG"
include_timestamps: true
include_module_names: true
ui:
show_detailed_progress: true
processing:
download_videos: false
download_audio: false
download_documents: false
logging:
default_level: "ERROR"
use_colors: false
ui:
show_progress_animations: false
use_emojis: false
Configuration classes:
NetworkConfig - Application/core/config.py:18ProcessingConfig - Application/core/config.py:42UIConfig - Application/core/config.py:68LoggingConfig - Application/core/config.py:88FetchNewsConfig - Application/core/config.py:108ConfigManager - Application/core/config.py:161Helper functions:
get_config() - Application/core/config.pyload_config() - Application/core/config.py