[{"id":"5fGQt4VbkDnr3Yp8PXPr","number":"1464522124090870782","begin":"2026-06-09T18:22:00+00:00","created":"2026-06-10T01:13:50+00:00","modified":"2026-06-12T22:50:20+00:00","external_desc":"Network traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.","updates":[{"created":"2026-06-12T22:50:20+00:00","modified":"2026-06-12T22:50:20+00:00","when":"2026-06-12T22:50:20+00:00","text":"**Summary**\nNetwork traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.\n**Description**\nA fire at a third-party data center facility required an emergency power shutdown of networking equipment, isolating a non-compute local Point of Presence (POP) in Delhi and reducing available network capacity in the metro area.\nWe rerouted significant traffic from the impacted facility in Delhi to address reduced local serving capabilities. As a result, a subset of Hybrid Connectivity and Virtual Private Cloud (VPC) customers may be impacted by the routing changes made to address reduced local, latency-optimized serving capabilities in Delhi. Affected customers may experience intermittent latency spikes due to demand exceeding capacity across Indian metros and regional ISPs.\nInitial traffic mitigations have yielded positive results for some Cloud customers. In parallel, we are pursuing additional Internet Edge peering capacity to reduce latency in the local Delhi metropolitan area. Further, we are augmenting out-of-region Internet Edge regional peering capacity in Chennai to provide additional load-balancing and redundancy to large ISPs in India (expected to be done by Wednesday, 2026-06-17 PDT). We have optimized capacity across network backbones to increase available headroom. Additionally, we are further augmenting our Delhi backbone capacity (expected to be complete by Monday, 2026-06-15 PDT). We will continue to closely monitor latency deviations and packet drops.\nWe will provide our next update by Monday, 2026-06-15 17:00 PDT.\n**Symptoms**\nCustomers may experience slightly elevated latency and non-optimal network routing into Google Cloud until the affected facility is fully restored.\n**Workaround**\nThere is no workaround at this time.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}]},{"created":"2026-06-11T23:25:15+00:00","modified":"2026-06-12T22:50:20+00:00","when":"2026-06-11T23:25:15+00:00","text":"**Summary**\nNetwork traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.\n**Description**\nA fire at a third-party data center facility required an emergency power shutdown of networking equipment, isolating a non-compute local Point of Presence (POP) in Delhi and reducing available network capacity in the metro area.\nWe rerouted significant traffic from the impacted facility in Delhi to address reduced local serving capabilities. As a result, a subset of Hybrid Connectivity and Virtual Private Cloud (VPC) customers may be impacted by the routing changes made to address reduced local, latency-optimized serving capabilities in Delhi. Affected customers may experience intermittent latency spikes due to demand exceeding capacity across Indian metros and regional ISPs.\nInitial traffic mitigations have yielded positive results for some Cloud customers. In parallel, we are pursuing additional Internet Edge peering capacity to reduce existing fragility in the metro. We are also optimizing capacity across network backbones to increase additional headroom for VPC customers in the affected region. Additionally, we are planning to augment our local Delhi POP and migrating select peering partners to further increase regional capacity. We will continue to closely monitor latency deviations and packet drops.\nWe will provide our next update by Friday, 2026-06-12 at 17:00 PDT.\n**Symptoms**\nCustomers may experience slightly elevated latency and non-optimal network routing into Google Cloud until the affected facility is fully restored.\n**Workaround**\nThere is no workaround at this time.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}]},{"created":"2026-06-10T22:25:38+00:00","modified":"2026-06-11T23:25:15+00:00","when":"2026-06-10T22:25:38+00:00","text":"**Summary**\nNetwork traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.\n**Description**\nA fire at a third-party data center facility required an emergency power shutdown of networking equipment, isolating a non-compute local Point of Presence (POP) in Delhi and reducing available network capacity in the metro area.\nWe rerouted significant traffic from the impacted facility in Delhi to address reduced local serving capabilities. As a result, a subset of Hybrid Connectivity and Virtual Private Cloud (VPC) customers may be impacted by the routing changes made to address reduced local, latency-optimized serving capabilities in Delhi. Affected customers may experience intermittent latency spikes due to demand exceeding capacity across Indian metros and regional ISPs.\nWe are investigating additional traffic mitigations and Internet Edge peering augmentation to alleviate the latency issues affecting our customers. We are continuing to work with the ISP partners in the region to mitigate any additional impact from unplanned failures.\nWe will provide our next update by Thursday, 2026-06-11 17:00 PDT.\n**Symptoms**\nCustomers may experience slightly elevated latency and non-optimal network routing into Google Cloud until the affected facility is fully restored.\n**Workaround**\nThere is no workaround at this time.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}]},{"created":"2026-06-10T01:13:50+00:00","modified":"2026-06-10T22:25:38+00:00","when":"2026-06-10T01:13:50+00:00","text":"**Summary**\nNetwork traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.\n**Description**\nA fire at a third-party data center facility required an emergency power shutdown of networking equipment, isolating a non-compute local Point of Presence (POP) in Delhi and reducing available network capacity in the metro area.\nWe rerouted significant traffic from the impacted facility in Delhi to address reduced local serving capabilities. As a result, a subset of Hybrid Connectivity and Virtual Private Cloud (VPC) customers may be impacted by the routing changes made to address reduced local, latency-optimized serving capabilities in Delhi. Affected customers may experience intermittent latency spikes due to demand exceeding capacity across Indian metros and regional ISPs.\nWe are investigating additional traffic mitigations and Internet Edge peering augmentation to alleviate the latency issues affecting our customers.\nWe will provide our next update by Wednesday, 2026-06-10 17:00 PDT.\n**Symptoms**\nCustomers may experience slightly elevated latency and non-optimal network routing into Google Cloud until the affected facility is fully restored.\n**Workaround**\nThere is no workaround at this time.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}]}],"most_recent_update":{"created":"2026-06-12T22:50:20+00:00","modified":"2026-06-12T22:50:20+00:00","when":"2026-06-12T22:50:20+00:00","text":"**Summary**\nNetwork traffic to Google Cloud originating from Delhi, Chennai, Mumbai and surrounding areas is experiencing intermittent periods of elevated latency and possible packet loss.\n**Description**\nA fire at a third-party data center facility required an emergency power shutdown of networking equipment, isolating a non-compute local Point of Presence (POP) in Delhi and reducing available network capacity in the metro area.\nWe rerouted significant traffic from the impacted facility in Delhi to address reduced local serving capabilities. As a result, a subset of Hybrid Connectivity and Virtual Private Cloud (VPC) customers may be impacted by the routing changes made to address reduced local, latency-optimized serving capabilities in Delhi. Affected customers may experience intermittent latency spikes due to demand exceeding capacity across Indian metros and regional ISPs.\nInitial traffic mitigations have yielded positive results for some Cloud customers. In parallel, we are pursuing additional Internet Edge peering capacity to reduce latency in the local Delhi metropolitan area. Further, we are augmenting out-of-region Internet Edge regional peering capacity in Chennai to provide additional load-balancing and redundancy to large ISPs in India (expected to be done by Wednesday, 2026-06-17 PDT). We have optimized capacity across network backbones to increase available headroom. Additionally, we are further augmenting our Delhi backbone capacity (expected to be complete by Monday, 2026-06-15 PDT). We will continue to closely monitor latency deviations and packet drops.\nWe will provide our next update by Monday, 2026-06-15 17:00 PDT.\n**Symptoms**\nCustomers may experience slightly elevated latency and non-optimal network routing into Google Cloud until the affected facility is fully restored.\n**Workaround**\nThere is no workaround at this time.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}]},"status_impact":"SERVICE_DISRUPTION","severity":"medium","service_key":"zall","service_name":"Multiple Products","affected_products":[{"title":"Hybrid Connectivity","id":"5x6CGnZvSHQZ26KtxpK1"},{"title":"Virtual Private Cloud (VPC)","id":"BSGtCUnz6ZmyajsjgTKv"}],"uri":"incidents/5fGQt4VbkDnr3Yp8PXPr","currently_affected_locations":[{"title":"Delhi (asia-south2)","id":"asia-south2"}],"previously_affected_locations":[]},{"id":"41E5S3mkTGDfkZuJZH5k","number":"6876619551109882402","begin":"2026-02-27T12:37:00+00:00","created":"2026-02-27T16:12:30+00:00","end":"2026-02-27T14:35:00+00:00","modified":"2026-03-09T05:25:43+00:00","external_desc":"Vertex AI Gemini API customers experienced increased error rates when accessing the global endpoint.","updates":[{"created":"2026-03-09T05:25:43+00:00","modified":"2026-03-09T05:25:43+00:00","when":"2026-03-09T05:25:43+00:00","text":"# Incident Report\n## Summary\nOn Friday, 27 February 2026 at 04:37 US/Pacific, customers using Vertex AI Gemini API models experienced increased error rates. Impacted services included Google Cloud Support, Agent Assist, Vertex Gemini API and Dialogflow CX in US regions and the global endpoint. The issue persisted for a duration of 1 hour and 58 minutes.\nThis is not the level of quality and reliability we strive to offer you, and we have taken immediate steps to improve the platform’s performance and availability.\n## Root Cause\nThis incident was caused by a configuration change to a safety filtering service that supports all Gemini models. For some specific requests, this created code paths that eventually led to service disruptions and capacity loss for the safety filtering service. Consequently, customers encountered overload (429 and 503) errors for their queries, with some users reporting elevated error rates for specific models in US regions.\n## Remediation and Prevention\nGoogle engineers were alerted to the issue via our automated monitoring system on Friday, 27 February 2026 04:54 US/Pacific and immediately started an investigation.\nEngineers identified the faulty configuration change and initiated a rollback to restore the previous stable configuration. Engineers also added more capacity to the service to stabilize it. Full service restoration was confirmed by 06:35 US/Pacific as the rollback propagated and servers became healthy. \\ \\\nGoogle is committed to preventing a repeat of this issue and is taking the following actions:\n* Reinforcing rollout processes to include mandatory validation checkpoints.\n* Improving alerting systems to monitor critical dependencies more closely.\n## ## Detailed Description of Impact\nOn Friday, 27 February 2026 between 04:37 and 06:35 US/Pacific, customers accessing Vertex Gemini APIs may have experienced the following:\n* **Affected Models:** All Vertex AI Gemini API models were affected, including gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3.0-flash-preview, gemini-3.0-pro-preview, gemini-2.0-flash, gemini-2.0-flash-lite.\n* **Error Experience:** * **PayGo Customers:** Experienced primarily 429 Resource Exhausted errors. * **Provisioned Throughput (PT) Customers:** Received 503 Service Unavailable errors. * For PT customers, most errors stopped at **06:00**. For PayGo customers, most errors stopped at **06:20**.\n* **Geographic Scope:** Global endpoint, us-central1, us-east4, and other US regions were impacted.","status":"AVAILABLE","affected_locations":[]},{"created":"2026-03-04T23:23:18+00:00","modified":"2026-03-09T05:25:43+00:00","when":"2026-03-04T23:23:18+00:00","text":"# Preliminary Incident Report\nWe apologize for the inconvenience this service disruption may have caused. We would like to provide some information about this incident below. Please note, this information is based on our best knowledge at the time of posting and is subject to change as our investigation continues. A final Incident Report with preventative actions will be posted once our investigation is complete. If you have experienced impact outside of what is listed below, please reach out to Google Cloud Support using https://cloud.google.com/support.\n## Date/Time of the Issue (All time US/Pacific)\nIncident Start: 27 February 2026 04:37\nIncident End: 27 February 2026 06:35\nDuration: 1 hour, 58 minutes\n## Summary\nOn Friday, 27 February 2026 at 04:37 US/Pacific, customers using Vertex AI Gemini API models (including Gemini 2.0, 2.5, and 3.0 previews) experienced increased error rates. Impacted services included Google Cloud Support, Agent Assist, the Vertex Gemini API and Dialogflow CX in US regions and the global endpoint for a duration of 1 hour and 58 minutes.\nThis is not the level of quality and reliability we strive to offer you, and we are taking immediate steps to improve the platform’s performance and availability.\n## Preliminary Root Cause\nThis incident was caused by a configuration change to a safety filtering service that supports all Gemini models. This configuration change enabled a code path that interacted poorly with specific requests, leading to service disruption for the safety filtering service. This in turn led to customers seeing overload (429 and 503) errors for their queries.\nGoogle engineers have begun a full root cause analysis and will provide additional information once it is available.\n## Remediation\nGoogle engineers were alerted to the service disruption via automated alert on Friday, 27 February 2026 04:54 US/Pacific and immediately started an investigation.\nEngineers identified the faulty configuration change for a safety filtering service and initiated a rollback to restore the previous stable configuration. Additionally, engineers added more capacity to the service. Full service restoration was confirmed by 06:35 US/Pacific as the rollback propagated and servers became healthy.\n## Description of Impact\nOn Friday, 27 February 2026 between 04:37 and 06:35 US/Pacific, customers accessing Vertex Gemini APIs may have experienced the following:\n- Affected Models: All Gemini versions were affected, including gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3.0-flash-preview, gemini-3.0-pro-preview, gemini-2.0-flash, gemini-2.0-flash-lite.\n- Error Experience: - PayGo Customers: Experienced primarily 429 Resource Exhausted errors. - Provisioned Throughput (PT) Customers: Received 503 Service Unavailable errors. - For PT customers, most errors stopped at 06:00. For PayGo customers, most errors stopped at 06:20.\n- Geographic Scope: Global endpoint, us-central1, us-east4, and other US regions were impacted.","status":"AVAILABLE","affected_locations":[]},{"created":"2026-02-27T16:12:30+00:00","modified":"2026-03-04T23:23:18+00:00","when":"2026-02-27T16:12:30+00:00","text":"**Description** \\\nBetween Friday, 2026-02-27, 04:36 and 06:45 PST, customers experienced increased error rates when accessing the Vertex Gemini API Global endpoint. The issue impacted API requests to multiple Gemini models.\nThe incident also caused downstream impact to Dialogflow CX, Agent Assist, Google Cloud Support AI agent, and Customer Experience Agent Studio, which rely on Gemini APIs.\nPreliminary analysis indicates the issue was triggered by a recent configuration change. Service was fully restored after the configuration change was rolled back.\nWe thank you for your patience while we worked on resolving the issue.\n**Symptom**\n\\\nCustomers experienced increased error rates when sending API requests to impacted multiple Gemini models through the global endpoint.","status":"SERVICE_INFORMATION","affected_locations":[{"title":"Montréal (northamerica-northeast1)","id":"northamerica-northeast1"},{"title":"São Paulo (southamerica-east1)","id":"southamerica-east1"},{"title":"Iowa (us-central1)","id":"us-central1"},{"title":"South Carolina (us-east1)","id":"us-east1"},{"title":"Northern Virginia (us-east4)","id":"us-east4"},{"title":"Columbus (us-east5)","id":"us-east5"},{"title":"Oregon (us-west1)","id":"us-west1"}]}],"most_recent_update":{"created":"2026-03-09T05:25:43+00:00","modified":"2026-03-09T05:25:43+00:00","when":"2026-03-09T05:25:43+00:00","text":"# Incident Report\n## Summary\nOn Friday, 27 February 2026 at 04:37 US/Pacific, customers using Vertex AI Gemini API models experienced increased error rates. Impacted services included Google Cloud Support, Agent Assist, Vertex Gemini API and Dialogflow CX in US regions and the global endpoint. The issue persisted for a duration of 1 hour and 58 minutes.\nThis is not the level of quality and reliability we strive to offer you, and we have taken immediate steps to improve the platform’s performance and availability.\n## Root Cause\nThis incident was caused by a configuration change to a safety filtering service that supports all Gemini models. For some specific requests, this created code paths that eventually led to service disruptions and capacity loss for the safety filtering service. Consequently, customers encountered overload (429 and 503) errors for their queries, with some users reporting elevated error rates for specific models in US regions.\n## Remediation and Prevention\nGoogle engineers were alerted to the issue via our automated monitoring system on Friday, 27 February 2026 04:54 US/Pacific and immediately started an investigation.\nEngineers identified the faulty configuration change and initiated a rollback to restore the previous stable configuration. Engineers also added more capacity to the service to stabilize it. Full service restoration was confirmed by 06:35 US/Pacific as the rollback propagated and servers became healthy. \\ \\\nGoogle is committed to preventing a repeat of this issue and is taking the following actions:\n* Reinforcing rollout processes to include mandatory validation checkpoints.\n* Improving alerting systems to monitor critical dependencies more closely.\n## ## Detailed Description of Impact\nOn Friday, 27 February 2026 between 04:37 and 06:35 US/Pacific, customers accessing Vertex Gemini APIs may have experienced the following:\n* **Affected Models:** All Vertex AI Gemini API models were affected, including gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3.0-flash-preview, gemini-3.0-pro-preview, gemini-2.0-flash, gemini-2.0-flash-lite.\n* **Error Experience:** * **PayGo Customers:** Experienced primarily 429 Resource Exhausted errors. * **Provisioned Throughput (PT) Customers:** Received 503 Service Unavailable errors. * For PT customers, most errors stopped at **06:00**. For PayGo customers, most errors stopped at **06:20**.\n* **Geographic Scope:** Global endpoint, us-central1, us-east4, and other US regions were impacted.","status":"AVAILABLE","affected_locations":[]},"status_impact":"SERVICE_INFORMATION","severity":"low","service_key":"zall","service_name":"Multiple Products","affected_products":[{"title":"Agent Assist","id":"eUntUKqUrHdbBLNcVVXq"},{"title":"Dialogflow CX","id":"BnCicQdHSdxaCv8Ya6Vm"},{"title":"Google Cloud Support","id":"bGThzF7oEGP5jcuDdMuk"},{"title":"Vertex Gemini API","id":"Z0FZJAMvEB4j3NbCJs6B"}],"uri":"incidents/41E5S3mkTGDfkZuJZH5k","currently_affected_locations":[],"previously_affected_locations":[{"title":"Global","id":"global"},{"title":"Montréal (northamerica-northeast1)","id":"northamerica-northeast1"},{"title":"São Paulo (southamerica-east1)","id":"southamerica-east1"},{"title":"Iowa (us-central1)","id":"us-central1"},{"title":"South Carolina (us-east1)","id":"us-east1"},{"title":"Northern Virginia (us-east4)","id":"us-east4"},{"title":"Columbus (us-east5)","id":"us-east5"},{"title":"Oregon (us-west1)","id":"us-west1"}]},{"id":"8cY8jdUpEGGbsSMSQk7J","number":"15787347096705530732","begin":"2025-07-18T14:42:00+00:00","created":"2025-07-18T15:54:23+00:00","end":"2025-07-18T16:47:00+00:00","modified":"2025-07-23T09:26:58+00:00","external_desc":"We are investigating elevated error rates with multiple products in us-east1","updates":[{"created":"2025-07-22T13:42:49+00:00","modified":"2025-07-23T09:26:58+00:00","when":"2025-07-22T13:42:49+00:00","text":"## \\# Incident Report\n## \\#\\# Summary\nOn Friday, 18 July 2025 07:50 US/Pacific, several Google Cloud Platform (GCP) and Google Workspace (GWS) products experienced elevated latencies and error rates in the us-east1 region for a duration of up to 1 hour and 57 minutes.\n**GCP Impact Duration:** 18 July 2025 07:50 \\- 09:47 US/Pacific : 1 hour 57 minutes\n**GWS Impact Duration:** 18 July 2025 07:50 \\- 08:40 US/Pacific : 50 minutes\nWe sincerely apologize for this incident, which does not reflect the level of quality and reliability we strive to offer. We are taking immediate steps to improve the platform’s performance and availability.\n##\n## \\#\\# Root Cause\nThe service interruption was triggered by a procedural error during a planned hardware replacement in our datacenter. An incorrect physical disconnection was made to the active network switch serving our control plane, rather than the redundant unit scheduled for removal. The redundant unit had been properly de-configured as part of the procedure, and the combination of these two events led to partitioning of the network control plane. Our network is designed to withstand this type of control plane failure by failing open, continuing operation.\nHowever, an operational topology change while the network control plane was in a failed open state caused our network fabric's topology information to become stale. This led to packet loss and service disruption until services were moved away from the fabric and control plane connectivity was restored.\n## \\#\\# Remediation and Prevention\nGoogle engineers were alerted to the outage by our monitoring system on 18 July 2025 07:06 US/Pacific and immediately started an investigation. The following timeline details the remediation and restoration efforts:\n* **07:39 US/Pacific**: The underlying root cause (device disconnect) was identified and onsite technicians were engaged to reconnect the control plane device and restore control plane connectivity. At that moment, network failure open mechanisms worked as expected and no impact was observed.\n* **07:50 US/Pacific**: A topology change led to traffic being routed suboptimally, due to the network being in a fail open state. This caused congestion on the subset of links, packet loss, and latency to customer traffic. Engineers made a decision to move traffic away from the affected fabric, which mitigated the impact for the majority of the services.\n* **08:40 US/Pacific**: Engineers mitigated Workspace impact by shifting traffic away from the affected region.\n* **09:47 US/Pacific**: Onsite technicians reconnected the device, control plane connectivity was fully restored and all services were back to stable state.\nGoogle is committed to preventing a repeat of the issue in the future, and is completing the following actions:\n* Pause non-critical workflows until safety controls are implemented (complete).\n* Strengthen safety controls for hardware upgrade workflows by end of Q3 2025\\.\n* Design and implement a mechanism to prevent control plane partitioning in case of dual failure of upstream routers by end of Q4 2025\\.\n## \\#\\# Detailed Description of Impact\n\\#\\#\\# GCP Impact:\nMultiple products in us-east1 were affected by the loss of network connectivity, with the most significant impacts seen in us-east1-b. Other regions were not affected.\nThe outage caused a range of issues for customers with zonal resources in the region, including packet loss across VPC networks, increased error rates and latency, service unavailable (503) errors, and slow or stuck operations up to loss of networking connectivity. While regional products were briefly impacted, they recovered quickly by failing over to unaffected zones.\nA small number (0.1%) of Persistent Disks in us-east1-b were unavailable for the duration of the outage: these disks became available once the outage was mitigated, with no customer data loss.\n\\#\\#\\# GWS Impact:\nA small subset of Workspace users, primarily around the Southeast US, experienced varying degrees of unavailability and increased delays across multiple products, including Gmail, Google Meet, Google Drive, Google Chat, Google Calendar, Google Groups, Google Doc/Editors, and Google Voice.","status":"AVAILABLE","affected_locations":[]},{"created":"2025-07-18T22:08:16+00:00","modified":"2025-07-22T13:42:49+00:00","when":"2025-07-18T22:08:16+00:00","text":"# Mini Incident Report\nWe apologize for the inconvenience this service disruption/outage may have caused. We would like to provide some information about this incident below. Please note, this information is based on our best knowledge at the time of posting and is subject to change as our investigation continues. If you have experienced impact outside of what is listed below, please reach out to Google Cloud Support using https://cloud.google.com/support or to Google Workspace Support using help article https://support.google.com/a/answer/1047213.\n(All Times US/Pacific)\n**GCP Impact start and end time:** 18 July 2025 08:10 - 09:47\n**Duration:** 1 hour 37 minutes\n**GWS Impact start and end time:** 18 July 2025 08:10 - 08:40\n**Duration:** 30 minutes\n**Regions/Zones:** us-east1\n**Description:**\nOn Friday, 18 July 2025 08:10 US/Pacific multiple GCP and GWS products experienced elevated latencies and error rates in the us-east1 region for a duration of up to 1 hour and 37 minutes.\nBased on the preliminary analysis, the root cause of the issue is a procedural error during a planned hardware maintenance in one of our data centers in the us-east1 region. Our engineering team mitigated the issue by draining traffic away from the clusters and then restoring the affected hardware.\nGoogle will be completing a full incident report in the following days that will provide a full root cause and preventive actions.\n**Customer Impact:**\nThe affected GCP and GWS products experienced elevated latencies and errors rates in the us-east1 region.\n**Affected Products:**\n**GCP :**\nAlloyDB for PostgreSQL, Apigee, Artifact Registry, Cloud Armor, Cloud Billing, Cloud Build, Cloud External Key Manager, Cloud Filestore, Cloud HSM, Cloud Key Management Service, Cloud Load Balancing, Cloud Monitoring, Cloud Run, Cloud Spanner, Cloud Storage for Firebase, Cloud Workflows, Database Migration Service, Dialogflow CX, Dialogflow ES, Google BigQuery, Google Cloud Dataflow, Google Cloud Dataproc, Google Cloud Storage, Google Cloud Support, Google Cloud Tasks, Google Compute Engine, Hybrid Connectivity, Media CDN, Network Telemetry, Private Service Connect, Secret Manager, Service Directory, Vertex AI Online Prediction, Virtual Private Cloud (VPC)\n**Workspace :**\nGmail, Google Meet, Google Drive, Google Chat, Google Calendar, Google Groups, Google Doc/Editors, Google Voice\n**Google SecOps:**\nGoogle SecOps SOAR \u0026 Google SecOps","status":"AVAILABLE","affected_locations":[]},{"created":"2025-07-18T18:03:11+00:00","modified":"2025-07-18T22:08:16+00:00","when":"2025-07-18T18:03:11+00:00","text":"The issue has been resolved for all affected products as of 2025-07-18 09:47 US/Pacific.\nFrom preliminary analysis, during a routine maintenance of our network in us-east1-b, we experienced elevated packet loss, causing service disruption in the zone.\nWe will publish a full Incident Report with root cause once we have completed our internal investigations.\nWe thank you for your patience while we worked on resolving the issue.","status":"AVAILABLE","affected_locations":[]},{"created":"2025-07-18T17:32:00+00:00","modified":"2025-07-18T18:03:11+00:00","when":"2025-07-18T17:32:00+00:00","text":"Our engineers have successfully recovered the network control plane in the affected us-east1 zones.\nWe're seeing multiple services reporting full recovery, and product engineers continue to validate the remaining services.\nWe'll provide another update with more details by 11:00 AM US/Pacific, July 18, 2025.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"South Carolina (us-east1)","id":"us-east1"}]},{"created":"2025-07-18T16:58:34+00:00","modified":"2025-07-18T17:32:00+00:00","when":"2025-07-18T16:58:34+00:00","text":"Our engineers have successfully recovered the network control plane in the affected us-east1 zones. We're seeing multiple services reporting full recovery, and product engineers are now validating the remaining services.\nWe'll provide another update with more details by 10:30 AM US/Pacific, July 18, 2025.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"South Carolina (us-east1)","id":"us-east1"}]},{"created":"2025-07-18T16:29:02+00:00","modified":"2025-07-18T16:58:34+00:00","when":"2025-07-18T16:29:02+00:00","text":"Our engineers have confirmed that us-east1-b is partially affected. All other zones in us-east1 are currently operating normally.\nOur engineers have recovered the failed hardware and are currently recovering the network control plane in the affected zones.\nWe'll provide another update by 10:00 AM US/Pacific, July 18, 2025.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"South Carolina (us-east1)","id":"us-east1"}]},{"created":"2025-07-18T15:54:23+00:00","modified":"2025-07-18T16:29:02+00:00","when":"2025-07-18T15:54:23+00:00","text":"We're currently experiencing elevated latency and error rates for several Cloud services in the us-east1 region, beginning at 7:06 AM PDT today, July 18, 2025. Our initial investigation points to a hardware infrastructure failure as the likely cause.\nWe apologize for any disruption this may be causing. We'll provide an update with more details by 9:15 AM PDT today.","status":"SERVICE_DISRUPTION","affected_locations":[{"title":"South Carolina (us-east1)","id":"us-east1"}]}],"most_recent_update":{"created":"2025-07-22T13:42:49+00:00","modified":"2025-07-23T09:26:58+00:00","when":"2025-07-22T13:42:49+00:00","text":"## \\# Incident Report\n## \\#\\# Summary\nOn Friday, 18 July 2025 07:50 US/Pacific, several Google Cloud Platform (GCP) and Google Workspace (GWS) products experienced elevated latencies and error rates in the us-east1 region for a duration of up to 1 hour and 57 minutes.\n**GCP Impact Duration:** 18 July 2025 07:50 \\- 09:47 US/Pacific : 1 hour 57 minutes\n**GWS Impact Duration:** 18 July 2025 07:50 \\- 08:40 US/Pacific : 50 minutes\nWe sincerely apologize for this incident, which does not reflect the level of quality and reliability we strive to offer. We are taking immediate steps to improve the platform’s performance and availability.\n##\n## \\#\\# Root Cause\nThe service interruption was triggered by a procedural error during a planned hardware replacement in our datacenter. An incorrect physical disconnection was made to the active network switch serving our control plane, rather than the redundant unit scheduled for removal. The redundant unit had been properly de-configured as part of the procedure, and the combination of these two events led to partitioning of the network control plane. Our network is designed to withstand this type of control plane failure by failing open, continuing operation.\nHowever, an operational topology change while the network control plane was in a failed open state caused our network fabric's topology information to become stale. This led to packet loss and service disruption until services were moved away from the fabric and control plane connectivity was restored.\n## \\#\\# Remediation and Prevention\nGoogle engineers were alerted to the outage by our monitoring system on 18 July 2025 07:06 US/Pacific and immediately started an investigation. The following timeline details the remediation and restoration efforts:\n* **07:39 US/Pacific**: The underlying root cause (device disconnect) was identified and onsite technicians were engaged to reconnect the control plane device and restore control plane connectivity. At that moment, network failure open mechanisms worked as expected and no impact was observed.\n* **07:50 US/Pacific**: A topology change led to traffic being routed suboptimally, due to the network being in a fail open state. This caused congestion on the subset of links, packet loss, and latency to customer traffic. Engineers made a decision to move traffic away from the affected fabric, which mitigated the impact for the majority of the services.\n* **08:40 US/Pacific**: Engineers mitigated Workspace impact by shifting traffic away from the affected region.\n* **09:47 US/Pacific**: Onsite technicians reconnected the device, control plane connectivity was fully restored and all services were back to stable state.\nGoogle is committed to preventing a repeat of the issue in the future, and is completing the following actions:\n* Pause non-critical workflows until safety controls are implemented (complete).\n* Strengthen safety controls for hardware upgrade workflows by end of Q3 2025\\.\n* Design and implement a mechanism to prevent control plane partitioning in case of dual failure of upstream routers by end of Q4 2025\\.\n## \\#\\# Detailed Description of Impact\n\\#\\#\\# GCP Impact:\nMultiple products in us-east1 were affected by the loss of network connectivity, with the most significant impacts seen in us-east1-b. Other regions were not affected.\nThe outage caused a range of issues for customers with zonal resources in the region, including packet loss across VPC networks, increased error rates and latency, service unavailable (503) errors, and slow or stuck operations up to loss of networking connectivity. While regional products were briefly impacted, they recovered quickly by failing over to unaffected zones.\nA small number (0.1%) of Persistent Disks in us-east1-b were unavailable for the duration of the outage: these disks became available once the outage was mitigated, with no customer data loss.\n\\#\\#\\# GWS Impact:\nA small subset of Workspace users, primarily around the Southeast US, experienced varying degrees of unavailability and increased delays across multiple products, including Gmail, Google Meet, Google Drive, Google Chat, Google Calendar, Google Groups, Google Doc/Editors, and Google Voice.","status":"AVAILABLE","affected_locations":[]},"status_impact":"SERVICE_DISRUPTION","severity":"medium","service_key":"zall","service_name":"Multiple Products","affected_products":[{"title":"AlloyDB for PostgreSQL","id":"fPovtKbaWN9UTepMm3kJ"},{"title":"Apigee","id":"9Y13BNFy4fJydvjdsN3X"},{"title":"Artifact Registry","id":"QbBuuiRdsLpMr9WmGwm5"},{"title":"Certificate Authority Service","id":"PvdE3tt1VdxKXzSyd8WF"},{"title":"Cloud Armor","id":"Kakg69gTC3xFyeJCY2va"},{"title":"Cloud Billing","id":"oLCqDYkE9NFWQVgctQTL"},{"title":"Cloud Build","id":"fw8GzBdZdqy4THau7e1y"},{"title":"Cloud External Key Manager","id":"GXALzYBgpi3XpsLLxLgu"},{"title":"Cloud Firestore","id":"CETSkT92V21G6A1x28me"},{"title":"Cloud HSM","id":"R3HPPUbVeFrApLaqQB4B"},{"title":"Cloud Key Management Service","id":"67cSySTL7dwJZo9JWUGU"},{"title":"Cloud Load Balancing","id":"ix7u9beT8ivBdjApTif3"},{"title":"Cloud Memorystore","id":"LGPLu3M5pcUAKU1z6eP3"},{"title":"Cloud Monitoring","id":"3zaaDb7antc73BM1UAVT"},{"title":"Cloud Run","id":"9D7d2iNBQWN24zc1VamE"},{"title":"Cloud Spanner","id":"EcNGGUgBtBLrtm4mWvqC"},{"title":"Cloud Storage for Firebase","id":"aY6Fbgy6TV4YWoutjhfe"},{"title":"Cloud Workflows","id":"C4P62W9Xc2zZ1Sk52bbw"},{"title":"Database Migration Service","id":"vY4CRgRFNbqUXWWyYGFS"},{"title":"Dataproc Metastore","id":"PXZh68NPz9auRyo4tVfy"},{"title":"Dialogflow CX","id":"BnCicQdHSdxaCv8Ya6Vm"},{"title":"Eventarc","id":"YaFawoMaXnqgY4keUBnW"},{"title":"Google App Engine","id":"kchyUtnkMHJWaAva8aYc"},{"title":"Google BigQuery","id":"9CcrhHUcFevXPSVaSxkf"},{"title":"Google Cloud Bigtable","id":"LfZSuE3xdQU46YMFV5fy"},{"title":"Google Cloud Console","id":"Wdsr1n5vyDvCt78qEifm"},{"title":"Google Cloud Dataflow","id":"T9bFoXPqG8w8g1YbWTKY"},{"title":"Google Cloud Dataproc","id":"yjXrEg3Yvy26BauMwr69"},{"title":"Google Cloud Pub/Sub","id":"dFjdLh2v6zuES6t9ADCB"},{"title":"Google Cloud SQL","id":"hV87iK5DcEXKgWU2kDri"},{"title":"Google Cloud Storage","id":"UwaYoXQ5bHYHG6EdiPB8"},{"title":"Google Cloud Support","id":"bGThzF7oEGP5jcuDdMuk"},{"title":"Google Cloud Tasks","id":"tMWyzhyKK4rAzAf7x62h"},{"title":"Google Compute Engine","id":"L3ggmi3Jy4xJmgodFA9K"},{"title":"Google Kubernetes Engine","id":"LCSbT57h59oR4W98NHuz"},{"title":"Hybrid Connectivity","id":"5x6CGnZvSHQZ26KtxpK1"},{"title":"Identity and Access Management","id":"adnGEDEt9zWzs8uF1oKA"},{"title":"Media CDN","id":"FK8WX6iZ3FuQL6qUwski"},{"title":"Memorystore for Memcached","id":"paC6vmsvnjCHsBkp4Wva"},{"title":"Memorystore for Redis","id":"3yFciKa9NQH7pmbnUYUs"},{"title":"Memorystore for Redis Cluster","id":"pAQRwuhqRn7Y1E2we8ds"},{"title":"Persistent Disk","id":"SzESm2Ux129pjDGKWD68"},{"title":"Private Service Connect","id":"fbzQRKqPfxZ2DUScMGV2"},{"title":"Secret Manager","id":"kzGfErQK3HzkFhptoeHH"},{"title":"Service Directory","id":"vmq8TsEZwitKYM6V9BaM"},{"title":"Vertex AI Online Prediction","id":"sdXM79fz1FS6ekNpu37K"},{"title":"Virtual Private Cloud (VPC)","id":"BSGtCUnz6ZmyajsjgTKv"}],"uri":"incidents/8cY8jdUpEGGbsSMSQk7J","currently_affected_locations":[],"previously_affected_locations":[{"title":"South Carolina (us-east1)","id":"us-east1"}]}]