import random
class WebBrowsingAgent:
def __init__(self):
self.memory = []
self.epsilon = 0.1 # Exploration rate
self.learning_rate = 0.05
self.discount_factor = 0.9
self.q_table = {}
def get_state(self, page_content):
# Simplified state representation: hash of page content summary
return hash(page_content[:100])
def choose_action(self, state, actions):
if random.random() < self.epsilon:
return random.choice(actions) # Explore
q_values = [self.q_table.get((state, a), 0) for a in actions]
max_q = max(q_values)
max_actions = [a for a, q in zip(actions, q_values) if q == max_q]
return random.choice(max_actions) # Exploit
def learn(self, state, action, reward, next_state, next_actions):
old_value = self.q_table.get((state, action), 0)
next_max = max([self.q_table.get((next_state, a), 0) for a in next_actions], default=0)
new_value = old_value + self.learning_rate * (reward + self.discount_factor * next_max - old_value)
self.q_table[(state, action)] = new_value
def update_memory(self, experience):
self.memory.append(experience)
if len(self.memory) > 1000:
self.memory.pop(0)
def replay(self):
if len(self.memory) < 32:
return
for state, action, reward, next_state, next_actions in random.sample(self.memory, 32):
self.learn(state, action, reward, next_state, next_actions)
# Simulated environment interaction
def simulate_task(agent):
pages = ["home", "search", "product", "checkout"]
current_page = "home"
total_reward = 0
steps = 0
max_steps = 20
while steps < max_steps:
state = agent.get_state(current_page)
actions = pages # possible pages to go
action = agent.choose_action(state, actions)
# Simulate reward: +10 if action leads closer to 'checkout', else -1
reward = 10 if pages.index(action) > pages.index(current_page) else -1
next_state = agent.get_state(action)
next_actions = pages
agent.learn(state, action, reward, next_state, next_actions)
agent.update_memory((state, action, reward, next_state, next_actions))
current_page = action
total_reward += reward
steps += 1
if current_page == "checkout":
break
return total_reward, steps
# Training loop
agent = WebBrowsingAgent()
for episode in range(500):
reward, steps = simulate_task(agent)
agent.replay()
# Evaluation
successes = 0
total_steps = 0
for _ in range(100):
reward, steps = simulate_task(agent)
if reward > 0 and steps < 15:
successes += 1
total_steps += steps
accuracy = successes / 100 * 100
avg_time = total_steps / 100 * 6 # assuming 6 seconds per step
print(f"Task completion accuracy: {accuracy:.2f}%")
print(f"Average task completion time: {avg_time:.2f} seconds")