import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
def greedy_regression(X, y):
"""
Greedy feature selection for multivariate regression.
Iteratively adds features that most improve R² score.
"""
n_features = X.shape[1]
selected = []
remaining = list(range(n_features))
print("Greedy Feature Selection:")
while remaining:
best_score = -np.inf
best_feature = None
# Try adding each remaining feature
for feature in remaining:
test_features = selected + [feature]
model = LinearRegression()
model.fit(X[:, test_features], y)
score = r2_score(y, model.predict(X[:, test_features]))
if score > best_score:
best_score = score
best_feature = feature
# Add best feature
selected.append(best_feature)
remaining.remove(best_feature)
print(f"Added feature {best_feature}: R² = {best_score:.4f}")
# Final model with all selected features
model = LinearRegression()
model.fit(X[:, selected], y)
return model, selected
# Example usage
if __name__ == "__main__":
# Generate sample data
np.random.seed(42)
X = np.random.randn(100, 5) # 5 features
y = 3*X[:, 0] + 2*X[:, 2] + 0.5*X[:, 4] + np.random.randn(100)*0.1
# Run greedy predictor
model, features = greedy_regression(X, y)
print(f"\nSelected feature order: {features}")
print(f"Final R²: {r2_score(y, model.predict(X[:, features])):.4f}")
print(f"Coefficients: {model.coef_}")