LangChain Integration
Build AI agents that can search, scrape, and research the web in real-time. Connect your LangChain applications directly to live web data with SearchHive tools.
Real-time Data
Your agents access current web information, not stale training data
Drop-in Tools
Pre-built LangChain tools that work with any agent framework
Easy Setup
Add web capabilities to existing agents in minutes
What you'll build
Installation
Install required packages
# Using pip
pip install langchain searchhive-python
# Using conda
conda install -c conda-forge langchain
pip install searchhive-python
Prerequisites
1. Web Search Tool
Create a LangChain tool that gives your agents real-time web search capabilities using SwiftSearch.
SwiftSearch LangChain Tool
from langchain.tools import BaseTool
from langchain.pydantic_v1 import BaseModel, Field
from typing import Optional, Type
import requests
import json
class SwiftSearchInput(BaseModel):
"""Input for SwiftSearch tool."""
query: str = Field(description="Search query")
max_results: Optional[int] = Field(default=5, description="Maximum results to return")
class SwiftSearchTool(BaseTool):
"""Tool for real-time web search using SearchHive SwiftSearch API."""
name = "web_search"
description = "Search the web for current information on any topic. Use this when you need up-to-date information that might not be in your training data."
args_schema: Type[BaseModel] = SwiftSearchInput
def __init__(self, api_key: str):
super().__init__()
self.api_key = api_key
def _run(self, query: str, max_results: int = 5) -> str:
"""Execute the search."""
try:
response = requests.post(
"https://www.searchhive.dev/api/v1/swiftsearch",
headers={
"Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json"
},
json={
"query": query,
"max_results": max_results
}
)
response.raise_for_status()
data = response.json()
# Format results for LLM consumption
results = []
for result in data.get('search_results', []):
results.append(f"""
Title: {result['title']}
URL: {result['link']}
Summary: {result['snippet']}
Date: {result.get('date', 'N/A')}
""")
return f"Found {len(results)} results for '{query}':\n\n" + "\n---\n".join(results)
except Exception as e:
return f"Search failed: {str(e)}"
async def _arun(self, query: str, max_results: int = 5) -> str:
"""Async version - for now, just call sync version."""
return self._run(query, max_results)
Tool customization
2. Web Scraping Tool
Enable your agents to extract content from specific web pages using ScrapeForge.
ScrapeForge LangChain Tool
from langchain.tools import BaseTool
from langchain.pydantic_v1 import BaseModel, Field
from typing import Optional, Type, List
import requests
class ScrapeForgeInput(BaseModel):
"""Input for ScrapeForge tool."""
url: str = Field(description="URL to scrape")
extract_options: Optional[List[str]] = Field(
default=["title", "text"],
description="What to extract: title, text, links, images, metadata"
)
class ScrapeToolLangChain(BaseTool):
"""Tool for web scraping using SearchHive ScrapeForge API."""
name = "web_scraper"
description = "Extract content from any webpage. Use this to get detailed content from specific URLs."
args_schema: Type[BaseModel] = ScrapeForgeInput
def __init__(self, api_key: str):
super().__init__()
self.api_key = api_key
def _run(self, url: str, extract_options: List[str] = ["title", "text"]) -> str:
"""Scrape the webpage."""
try:
response = requests.post(
"https://www.searchhive.dev/api/v1/scrapeforge",
headers={
"Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json"
},
json={
"url": url,
"extract_options": extract_options
}
)
response.raise_for_status()
data = response.json()
content = data.get('primary_content', {})
if content.get('error'):
return f"Failed to scrape {url}: {content['error']}"
result = f"Content from {url}:\n\n"
if 'title' in extract_options and content.get('title'):
result += f"Title: {content['title']}\n\n"
if 'text' in extract_options and content.get('text'):
# Truncate very long content
text = content['text'][:2000]
if len(content['text']) > 2000:
text += "... [truncated]"
result += f"Content: {text}\n\n"
return result
except Exception as e:
return f"Scraping failed: {str(e)}"
async def _arun(self, url: str, extract_options: List[str] = ["title", "text"]) -> str:
"""Async version."""
return self._run(url, extract_options)
3. AI Research Tool
Add comprehensive research capabilities using DeepDive for multi-source analysis.
DeepDive LangChain Research Tool
from langchain.tools import BaseTool
from langchain.pydantic_v1 import BaseModel, Field
from typing import Optional, Type
import requests
class DeepDiveInput(BaseModel):
"""Input for DeepDive research tool."""
topic: str = Field(description="Research topic")
max_sources: Optional[int] = Field(default=5, description="Maximum sources to analyze")
class ResearchTool(BaseTool):
"""Tool for AI-powered research using SearchHive DeepDive API."""
name = "research_topic"
description = "Conduct comprehensive research on any topic by analyzing multiple sources. Use this for in-depth analysis and when you need comprehensive information."
args_schema: Type[BaseModel] = DeepDiveInput
def __init__(self, api_key: str):
super().__init__()
self.api_key = api_key
def _run(self, topic: str, max_sources: int = 5) -> str:
"""Research the topic."""
try:
response = requests.post(
"https://www.searchhive.dev/api/v1/deepdive",
headers={
"Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json"
},
json={
"topic": topic,
"max_sources": max_sources
}
)
response.raise_for_status()
data = response.json()
result = f"Research results for '{topic}':\n\n"
result += f"Sources analyzed: {data.get('sources_analyzed', 0)}\n"
result += f"Research depth: {data.get('research_depth', 'N/A')}\n\n"
# Include search results
if data.get('search_results'):
result += "Key Sources:\n"
for i, source in enumerate(data['search_results'][:3], 1):
result += f"{i}. {source['title']} ({source['link']})\n"
result += "\n"
# Include scraped content summaries
if data.get('scraped_content'):
result += "Content Analysis:\n"
for content in data['scraped_content'][:2]:
if not content.get('error') and content.get('text'):
# Get first 200 chars as summary
summary = content['text'][:200] + "..." if len(content['text']) > 200 else content['text']
result += f"• {summary}\n\n"
return result
except Exception as e:
return f"Research failed: {str(e)}"
async def _arun(self, topic: str, max_sources: int = 5) -> str:
"""Async version."""
return self._run(topic, max_sources)
Creating Your Web-Enabled Agent
Now let's put it all together and create an AI agent with web superpowers.
Web-enabled LangChain Agent
import os
from langchain.agents import initialize_agent, AgentType
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
# Initialize your SearchHive tools
api_key = os.getenv("SEARCHHIVE_API_KEY")
search_tool = SwiftSearchTool(api_key)
scrape_tool = ScrapeToolLangChain(api_key)
research_tool = ResearchTool(api_key)
# Initialize LLM
llm = OpenAI(temperature=0)
# Create memory for conversation context
memory = ConversationBufferMemory(memory_key="chat_history")
# Initialize agent with tools
agent = initialize_agent(
tools=[search_tool, scrape_tool, research_tool],
llm=llm,
agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
memory=memory,
verbose=True
)
# Now your agent can use real-time web data!
result = agent.run("What are the latest developments in AI? Then find a specific article and analyze it in detail.")
print(result)
Environment setup
•
SEARCHHIVE_API_KEY
- Your SearchHive API key•
OPENAI_API_KEY
- Your OpenAI API key (or use another LLM)Real Example: Bitcoin Price Bot
Here's a complete example of a Bitcoin price monitoring bot that demonstrates real-world usage.
Bitcoin Price Analysis Bot
from langchain.agents import initialize_agent, AgentType
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
class BitcoinPriceBot:
def __init__(self, searchhive_api_key: str, openai_api_key: str):
# Initialize tools
self.search_tool = SwiftSearchTool(searchhive_api_key)
self.scrape_tool = ScrapeToolLangChain(searchhive_api_key)
# Initialize LLM
self.llm = OpenAI(api_key=openai_api_key, temperature=0.3)
# Create specialized agent
self.agent = initialize_agent(
tools=[self.search_tool, self.scrape_tool],
llm=self.llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
def get_price_analysis(self) -> str:
"""Get comprehensive Bitcoin price analysis."""
prompt = """
I need a comprehensive Bitcoin price analysis. Please:
1. Search for the current Bitcoin price and recent news
2. Find expert predictions for 2025
3. Scrape detailed analysis from major crypto news sites
4. Provide a summary with key price drivers and predictions
"""
return self.agent.run(prompt)
def monitor_specific_sources(self, sources: list) -> str:
"""Monitor specific sources for Bitcoin news."""
results = []
for source_url in sources:
try:
content = self.scrape_tool._run(source_url)
results.append(f"Content from {source_url}:\n{content}\n---\n")
except Exception as e:
results.append(f"Failed to scrape {source_url}: {e}\n---\n")
# Ask LLM to analyze all content
analysis_prompt = f"""
Analyze the following Bitcoin-related content and provide insights:
{' '.join(results)}
Please summarize key insights, price predictions, and market sentiment.
"""
return self.llm(analysis_prompt)
# Usage example
bot = BitcoinPriceBot(
searchhive_api_key="sk_live_your_key_here",
openai_api_key="your_openai_key_here"
)
# Get current analysis
analysis = bot.get_price_analysis()
print(analysis)
# Monitor specific sources
sources = [
"https://coindesk.com/price/bitcoin",
"https://cointelegraph.com/bitcoin-price-prediction"
]
monitoring_result = bot.monitor_specific_sources(sources)
print(monitoring_result)
What this bot does:
- • Searches for current Bitcoin price and news
- • Finds expert predictions for 2025
- • Scrapes detailed analysis from crypto news sites
- • Provides AI-generated insights and summaries
Advanced Patterns
Create specialized agents for different tasks: one for search, one for analysis, one for reporting.
Handle real-time data streams and implement async processing for better performance.
Error Handling & Best Practices
Robust error handling
def _run(self, query: str) -> str:
try:
response = requests.post(...)
response.raise_for_status()
return self._format_results(response.json())
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
return "Error: Invalid API key. Please check your SearchHive configuration."
elif e.response.status_code == 402:
return "Error: Insufficient credits. Please add more credits to your account."
elif e.response.status_code == 429:
return "Error: Rate limit exceeded. Please wait before making more requests."
else:
return f"API Error: {e.response.status_code} - {e.response.text}"
except Exception as e:
return f"Unexpected error: {str(e)}"