Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/fetch/src/mcp_server_fetch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ def main():
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
parser.add_argument(
"--timeout",
type=int,
default=None,
help="Default request timeout in seconds (default: 30, or FETCH_TIMEOUT_MS env var)",
)

args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url, args.timeout))


if __name__ == "__main__":
Expand Down
35 changes: 31 additions & 4 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Annotated, Tuple
from urllib.parse import urlparse, urlunparse

Expand All @@ -22,6 +23,7 @@

DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
DEFAULT_TIMEOUT_SECS = 30


def extract_content_from_html(html: str) -> str:
Expand Down Expand Up @@ -109,7 +111,8 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url:


async def fetch_url(
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None,
timeout: int = DEFAULT_TIMEOUT_SECS,
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
Expand All @@ -122,7 +125,7 @@ async def fetch_url(
url,
follow_redirects=True,
headers={"User-Agent": user_agent},
timeout=30,
timeout=timeout,
)
except HTTPError as e:
raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Failed to fetch {url}: {e!r}"))
Expand Down Expand Up @@ -176,23 +179,45 @@ class Fetch(BaseModel):
description="Get the actual HTML content of the requested page, without simplification.",
),
]
timeout: Annotated[
int | None,
Field(
default=None,
description="Request timeout in seconds. Overrides server default when provided.",
gt=0,
lt=600,
),
]


def _get_default_timeout(cli_timeout: int | None = None) -> int:
"""Resolve the server-wide default timeout (CLI flag > env var > hardcoded default)."""
if cli_timeout is not None:
return cli_timeout
env_val = os.environ.get("FETCH_TIMEOUT_MS")
if env_val:
return max(1, int(env_val) // 1000)
return DEFAULT_TIMEOUT_SECS


async def serve(
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
timeout: int | None = None,
) -> None:
"""Run the fetch MCP server.

Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
timeout: Optional default timeout in seconds for all requests
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL
default_timeout = _get_default_timeout(timeout)

@server.list_tools()
async def list_tools() -> list[Tool]:
Expand Down Expand Up @@ -234,8 +259,10 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

request_timeout = args.timeout if args.timeout is not None else default_timeout
content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url,
timeout=request_timeout,
)
original_length = len(content)
if args.start_index >= original_length:
Expand All @@ -262,7 +289,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
url = arguments["url"]

try:
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url, timeout=default_timeout)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(
Expand Down
79 changes: 79 additions & 0 deletions src/fetch/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
get_robots_txt_url,
check_may_autonomously_fetch_url,
fetch_url,
_get_default_timeout,
DEFAULT_USER_AGENT_AUTONOMOUS,
DEFAULT_TIMEOUT_SECS,
)


Expand Down Expand Up @@ -324,3 +326,80 @@ async def test_fetch_with_proxy(self):

# Verify AsyncClient was called with proxy
mock_client_class.assert_called_once_with(proxy="http://proxy.example.com:8080")


class TestTimeout:
"""Tests for configurable timeout."""

def test_default_timeout_no_overrides(self):
"""Default timeout is 30s when no CLI or env override."""
assert _get_default_timeout(None) == DEFAULT_TIMEOUT_SECS

def test_cli_timeout_overrides_default(self):
"""CLI --timeout flag takes highest priority."""
assert _get_default_timeout(60) == 60

def test_env_var_overrides_default(self):
"""FETCH_TIMEOUT_MS env var overrides the hardcoded default."""
with patch.dict("os.environ", {"FETCH_TIMEOUT_MS": "120000"}):
assert _get_default_timeout(None) == 120

def test_cli_timeout_overrides_env_var(self):
"""CLI flag takes priority over env var."""
with patch.dict("os.environ", {"FETCH_TIMEOUT_MS": "120000"}):
assert _get_default_timeout(10) == 10

def test_env_var_ms_to_seconds_conversion(self):
"""FETCH_TIMEOUT_MS is correctly converted from ms to seconds."""
with patch.dict("os.environ", {"FETCH_TIMEOUT_MS": "5000"}):
assert _get_default_timeout(None) == 5

def test_env_var_minimum_is_one_second(self):
"""Timeout cannot go below 1 second even with low env var."""
with patch.dict("os.environ", {"FETCH_TIMEOUT_MS": "100"}):
assert _get_default_timeout(None) == 1

@pytest.mark.asyncio
async def test_fetch_url_uses_custom_timeout(self):
"""Test that fetch_url passes the timeout to httpx."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.text = '{"data": "test"}'
mock_response.headers = {"content-type": "application/json"}

with patch("httpx.AsyncClient") as mock_client_class:
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

await fetch_url(
"https://example.com/slow",
DEFAULT_USER_AGENT_AUTONOMOUS,
timeout=120,
)

call_kwargs = mock_client.get.call_args.kwargs
assert call_kwargs["timeout"] == 120

@pytest.mark.asyncio
async def test_fetch_url_default_timeout(self):
"""Test that fetch_url uses DEFAULT_TIMEOUT_SECS when not specified."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.text = "ok"
mock_response.headers = {"content-type": "text/plain"}

with patch("httpx.AsyncClient") as mock_client_class:
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

await fetch_url(
"https://example.com/page",
DEFAULT_USER_AGENT_AUTONOMOUS,
)

call_kwargs = mock_client.get.call_args.kwargs
assert call_kwargs["timeout"] == DEFAULT_TIMEOUT_SECS
Loading